diff --git a/cpp/ql/lib/change-notes/2026-01-19-parameterized-barrier-guard.md b/cpp/ql/lib/change-notes/2026-01-19-parameterized-barrier-guard.md new file mode 100644 index 000000000000..4f2d754c0b87 --- /dev/null +++ b/cpp/ql/lib/change-notes/2026-01-19-parameterized-barrier-guard.md @@ -0,0 +1,4 @@ +--- +category: feature +--- +* Added modules `DataFlow::ParameterizedBarrierGuard` and `DataFlow::ParameterizedInstructionBarrierGuard`. These modules provide the same features as `DataFlow::BarrierGuard` and `DataFlow::InstructionBarrierGuard`, but allow for an additional parameter to support properly using them in dataflow configurations that uses flow states. \ No newline at end of file diff --git a/cpp/ql/lib/semmle/code/cpp/dataflow/ExternalFlow.qll b/cpp/ql/lib/semmle/code/cpp/dataflow/ExternalFlow.qll index 7232326f1b3d..eecacb148340 100644 --- a/cpp/ql/lib/semmle/code/cpp/dataflow/ExternalFlow.qll +++ b/cpp/ql/lib/semmle/code/cpp/dataflow/ExternalFlow.qll @@ -95,6 +95,7 @@ import cpp private import new.DataFlow +private import semmle.code.cpp.controlflow.IRGuards private import semmle.code.cpp.ir.dataflow.internal.DataFlowPrivate as Private private import semmle.code.cpp.ir.dataflow.internal.DataFlowUtil private import internal.FlowSummaryImpl @@ -367,6 +368,8 @@ private predicate elementSpec( ) { sourceModel(namespace, type, subtypes, name, signature, ext, _, _, _, _) or sinkModel(namespace, type, subtypes, name, signature, ext, _, _, _, _) or + barrierModel(namespace, type, subtypes, name, signature, ext, _, _, _, _) or + barrierGuardModel(namespace, type, subtypes, name, signature, ext, _, _, _, _, _) or summaryModel(namespace, type, subtypes, name, signature, ext, _, _, _, _, _) } @@ -1028,6 +1031,84 @@ private module Cached { isSinkNode(n, kind, model) and n.asNode() = node ) } + + private newtype TKindModelPair = + TMkPair(string kind, string model) { isBarrierGuardNode(_, _, kind, model) } + + private GuardValue convertAcceptingValue(Public::AcceptingValue av) { + av.isTrue() and result.asBooleanValue() = true + or + av.isFalse() and result.asBooleanValue() = false + or + // NOTE: The below cases don't contribute anything currently since the + // callers immediately use `.asBooleanValue()` to convert the `GuardValue` + // to a boolean. Once we're willing to accept the breaking change of + // converting the barrier guard API to use `GuardValue`s instead `Boolean`s + // we can remove this restriction. + av.isNoException() and result.getDualValue().isThrowsException() + or + av.isZero() and result.asIntValue() = 0 + or + av.isNotZero() and result.getDualValue().asIntValue() = 0 + or + av.isNull() and result.isNullValue() + or + av.isNotNull() and result.isNonNullValue() + } + + private predicate barrierGuardChecks(IRGuardCondition g, Expr e, boolean gv, TKindModelPair kmp) { + exists( + SourceSinkInterpretationInput::InterpretNode n, Public::AcceptingValue acceptingvalue, + string kind, string model + | + isBarrierGuardNode(n, acceptingvalue, kind, model) and + n.asNode().asExpr() = e and + kmp = TMkPair(kind, model) and + gv = convertAcceptingValue(acceptingvalue).asBooleanValue() and + n.asNode().(Private::ArgumentNode).getCall().asCallInstruction() = g + ) + } + + private newtype TKindModelPairIntPair = + MkKindModelPairIntPair(TKindModelPair pair, int indirectionIndex) { + indirectionIndex > 0 and + Private::nodeHasInstruction(_, _, indirectionIndex) and + exists(pair) + } + + private predicate indirectBarrierGuardChecks( + IRGuardCondition g, Expr e, boolean gv, TKindModelPairIntPair kmp + ) { + exists( + SourceSinkInterpretationInput::InterpretNode interpretNode, + Public::AcceptingValue acceptingvalue, string kind, string model, int indirectionIndex, + Private::ArgumentNode arg + | + isBarrierGuardNode(interpretNode, acceptingvalue, kind, model) and + arg = interpretNode.asNode() and + arg.asIndirectExpr(indirectionIndex) = e and + kmp = MkKindModelPairIntPair(TMkPair(kind, model), indirectionIndex) and + gv = convertAcceptingValue(acceptingvalue).asBooleanValue() and + arg.getCall().asCallInstruction() = g + ) + } + + /** + * Holds if `node` is specified as a barrier with the given kind in a MaD flow + * model. + */ + cached + predicate barrierNode(DataFlow::Node node, string kind, string model) { + exists(SourceSinkInterpretationInput::InterpretNode n | + isBarrierNode(n, kind, model) and n.asNode() = node + ) + or + DataFlow::ParameterizedBarrierGuard::getABarrierNode(TMkPair(kind, + model)) = node + or + DataFlow::ParameterizedBarrierGuard::getAnIndirectBarrierNode(MkKindModelPairIntPair(TMkPair(kind, + model), _)) = node + } } import Cached @@ -1044,6 +1125,12 @@ predicate sourceNode(DataFlow::Node node, string kind) { sourceNode(node, kind, */ predicate sinkNode(DataFlow::Node node, string kind) { sinkNode(node, kind, _) } +/** + * Holds if `node` is specified as a barrier with the given kind in a MaD flow + * model. + */ +predicate barrierNode(DataFlow::Node node, string kind) { barrierNode(node, kind, _) } + private predicate interpretSummary( Function f, string input, string output, string kind, string provenance, string model ) { diff --git a/cpp/ql/lib/semmle/code/cpp/dataflow/internal/FlowSummaryImpl.qll b/cpp/ql/lib/semmle/code/cpp/dataflow/internal/FlowSummaryImpl.qll index a1d9dd86c400..3c4177dc8569 100644 --- a/cpp/ql/lib/semmle/code/cpp/dataflow/internal/FlowSummaryImpl.qll +++ b/cpp/ql/lib/semmle/code/cpp/dataflow/internal/FlowSummaryImpl.qll @@ -149,16 +149,27 @@ module SourceSinkInterpretationInput implements } predicate barrierElement( - Element n, string output, string kind, Public::Provenance provenance, string model + Element e, string output, string kind, Public::Provenance provenance, string model ) { - none() + exists( + string namespace, string type, boolean subtypes, string name, string signature, string ext + | + barrierModel(namespace, type, subtypes, name, signature, ext, output, kind, provenance, model) and + e = interpretElement(namespace, type, subtypes, name, signature, ext) + ) } predicate barrierGuardElement( - Element n, string input, Public::AcceptingValue acceptingvalue, string kind, + Element e, string input, Public::AcceptingValue acceptingvalue, string kind, Public::Provenance provenance, string model ) { - none() + exists( + string package, string type, boolean subtypes, string name, string signature, string ext + | + barrierGuardModel(package, type, subtypes, name, signature, ext, input, acceptingvalue, kind, + provenance, model) and + e = interpretElement(package, type, subtypes, name, signature, ext) + ) } private newtype TInterpretNode = diff --git a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll index 919a32d8a4b7..1c338d5a52d9 100644 --- a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll +++ b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/DataFlowUtil.qll @@ -2417,6 +2417,19 @@ class ContentSet instanceof Content { } } +private signature class ParamSig; + +private module WithParam { + /** + * Holds if the guard `g` validates the expression `e` upon evaluating to `branch`. + * + * The expression `e` is expected to be a syntactic part of the guard `g`. + * For example, the guard `g` might be a call `isSafe(x)` and the expression `e` + * the argument `x`. + */ + signature predicate guardChecksSig(IRGuardCondition g, Expr e, boolean branch, P param); +} + /** * Holds if the guard `g` validates the expression `e` upon evaluating to `branch`. * @@ -2438,7 +2451,7 @@ private predicate controls(IRGuardCondition g, Node n, boolean edge) { * This is expected to be used in `isBarrier`/`isSanitizer` definitions * in data flow and taint tracking. */ -module BarrierGuard { +module ParameterizedBarrierGuard::guardChecksSig/4 guardChecks> { bindingset[value, n] pragma[inline_late] private predicate convertedExprHasValueNumber(ValueNumber value, Node n) { @@ -2448,12 +2461,13 @@ module BarrierGuard { ) } - private predicate guardChecksNode(IRGuardCondition g, Node n, boolean branch) { - guardChecks(g, n.asOperand().getDef().getConvertedResultExpression(), branch) + private predicate guardChecksNode(IRGuardCondition g, Node n, boolean branch, P p) { + guardChecks(g, n.asOperand().getDef().getConvertedResultExpression(), branch, p) } /** - * Gets an expression node that is safely guarded by the given guard check. + * Gets an expression node that is safely guarded by the given guard check + * when the parameter is `p`. * * For example, given the following code: * ```cpp @@ -2484,19 +2498,27 @@ module BarrierGuard { * * NOTE: If an indirect expression is tracked, use `getAnIndirectBarrierNode` instead. */ - Node getABarrierNode() { + Node getABarrierNode(P p) { exists(IRGuardCondition g, ValueNumber value, boolean edge | convertedExprHasValueNumber(value, result) and guardChecks(g, - pragma[only_bind_into](value.getAnInstruction().getConvertedResultExpression()), edge) and + pragma[only_bind_into](value.getAnInstruction().getConvertedResultExpression()), edge, p) and controls(g, result, edge) ) or - result = SsaImpl::BarrierGuard::getABarrierNode() + result = SsaImpl::BarrierGuard::getABarrierNode(p) } /** - * Gets an indirect expression node that is safely guarded by the given guard check. + * Gets an expression node that is safely guarded by the given guard check. + * + * See `getABarrierNode/1` for examples. + */ + Node getABarrierNode() { result = getABarrierNode(_) } + + /** + * Gets an indirect expression node that is safely guarded by the given + * guard check with parameter `p`. * * For example, given the following code: * ```cpp @@ -2528,6 +2550,13 @@ module BarrierGuard { * * NOTE: If a non-indirect expression is tracked, use `getABarrierNode` instead. */ + Node getAnIndirectBarrierNode(P p) { result = getAnIndirectBarrierNode(_, p) } + + /** + * Gets an indirect expression node that is safely guarded by the given guard check. + * + * See `getAnIndirectBarrierNode/1` for examples. + */ Node getAnIndirectBarrierNode() { result = getAnIndirectBarrierNode(_) } bindingset[value, n] @@ -2542,10 +2571,10 @@ module BarrierGuard { } private predicate guardChecksIndirectNode( - IRGuardCondition g, Node n, boolean branch, int indirectionIndex + IRGuardCondition g, Node n, boolean branch, int indirectionIndex, P p ) { guardChecks(g, n.asIndirectOperand(indirectionIndex).getDef().getConvertedResultExpression(), - branch) + branch, p) } /** @@ -2582,19 +2611,44 @@ module BarrierGuard { * * NOTE: If a non-indirect expression is tracked, use `getABarrierNode` instead. */ - Node getAnIndirectBarrierNode(int indirectionIndex) { + Node getAnIndirectBarrierNode(int indirectionIndex, P p) { exists(IRGuardCondition g, ValueNumber value, boolean edge | indirectConvertedExprHasValueNumber(indirectionIndex, value, result) and guardChecks(g, - pragma[only_bind_into](value.getAnInstruction().getConvertedResultExpression()), edge) and + pragma[only_bind_into](value.getAnInstruction().getConvertedResultExpression()), edge, p) and controls(g, result, edge) ) or result = - SsaImpl::BarrierGuardWithIntParam::getABarrierNode(indirectionIndex) + SsaImpl::BarrierGuardWithIntParam::getABarrierNode(indirectionIndex, + p) } } +/** + * Provides a set of barrier nodes for a guard that validates an expression. + * + * This is expected to be used in `isBarrier`/`isSanitizer` definitions + * in data flow and taint tracking. + */ +module BarrierGuard { + private predicate guardChecks(IRGuardCondition g, Expr e, boolean branch, Unit unit) { + guardChecks(g, e, branch) and + exists(unit) + } + + import ParameterizedBarrierGuard +} + +private module InstrWithParam { + /** + * Holds if the guard `g` validates the instruction `instr` upon evaluating to `branch`. + */ + signature predicate instructionGuardChecksSig( + IRGuardCondition g, Instruction instr, boolean branch, P p + ); +} + /** * Holds if the guard `g` validates the instruction `instr` upon evaluating to `branch`. */ @@ -2606,7 +2660,9 @@ signature predicate instructionGuardChecksSig(IRGuardCondition g, Instruction in * This is expected to be used in `isBarrier`/`isSanitizer` definitions * in data flow and taint tracking. */ -module InstructionBarrierGuard { +module ParameterizedInstructionBarrierGuard< + ParamSig P, InstrWithParam

::instructionGuardChecksSig/4 instructionGuardChecks> +{ bindingset[value, n] pragma[inline_late] private predicate operandHasValueNumber(ValueNumber value, Node n) { @@ -2616,21 +2672,27 @@ module InstructionBarrierGuard::getABarrierNode() + result = SsaImpl::BarrierGuard::getABarrierNode(p) } + /** Gets a node that is safely guarded by the given guard check. */ + Node getABarrierNode() { result = getABarrierNode(_) } + bindingset[value, n] pragma[inline_late] private predicate indirectOperandHasValueNumber(ValueNumber value, int indirectionIndex, Node n) { @@ -2641,25 +2703,52 @@ module InstructionBarrierGuard::getABarrierNode(indirectionIndex) + SsaImpl::BarrierGuardWithIntParam::getABarrierNode(indirectionIndex, + p) } + + /** + * Gets an indirect node that is safely guarded by the given guard check + * with parameter `p`. + */ + Node getAnIndirectBarrierNode(P p) { result = getAnIndirectBarrierNode(_, p) } + + /** Gets an indirect node that is safely guarded by the given guard check. */ + Node getAnIndirectBarrierNode() { result = getAnIndirectBarrierNode(_) } +} + +/** + * Provides a set of barrier nodes for a guard that validates an instruction. + * + * This is expected to be used in `isBarrier`/`isSanitizer` definitions + * in data flow and taint tracking. + */ +module InstructionBarrierGuard { + private predicate instructionGuardChecks( + IRGuardCondition g, Instruction i, boolean branch, Unit unit + ) { + instructionGuardChecks(g, i, branch) and + exists(unit) + } + + import ParameterizedInstructionBarrierGuard } /** diff --git a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/SsaImpl.qll b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/SsaImpl.qll index 99f13a81725e..d4a80ff25c80 100644 --- a/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/SsaImpl.qll +++ b/cpp/ql/lib/semmle/code/cpp/ir/dataflow/internal/SsaImpl.qll @@ -1035,13 +1035,23 @@ class SynthNode extends DataFlowIntegrationImpl::SsaNode { SynthNode() { not this.asDefinition() instanceof SsaImpl::WriteDefinition } } -signature predicate guardChecksNodeSig(IRGuards::IRGuardCondition g, Node e, boolean branch); +private signature class ParamSig; -signature predicate guardChecksNodeSig( - IRGuards::IRGuardCondition g, Node e, boolean branch, int indirectionIndex -); +private module ParamIntPair { + newtype TPair = MkPair(P p, int indirectionIndex) { nodeHasInstruction(_, _, indirectionIndex) } +} + +private module WithParam { + signature predicate guardChecksNodeSig(IRGuards::IRGuardCondition g, Node e, boolean gv, P param); +} + +private module IntWithParam { + signature predicate guardChecksNodeSig( + IRGuards::IRGuardCondition g, Node e, boolean gv, int indirectionIndex, P param + ); +} -module BarrierGuardWithIntParam { +module BarrierGuardWithIntParam::guardChecksNodeSig/5 guardChecksNode> { private predicate ssaDefReachesCertainUse(Definition def, UseImpl use) { exists(SourceVariable v, IRBlock bb, int i | use.hasIndexInBlock(bb, i, v) and @@ -1052,21 +1062,23 @@ module BarrierGuardWithIntParam { private predicate guardChecksInstr( IRGuards::Guards_v1::Guard g, IRGuards::GuardsInput::Expr instr, IRGuards::GuardValue gv, - int indirectionIndex + ParamIntPair

::TPair pair ) { - exists(Node node | + exists(Node node, int indirectionIndex, P p | + pair = ParamIntPair

::MkPair(p, indirectionIndex) and nodeHasInstruction(node, instr, indirectionIndex) and - guardChecksNode(g, node, gv.asBooleanValue(), indirectionIndex) + guardChecksNode(g, node, gv.asBooleanValue(), indirectionIndex, p) ) } private predicate guardChecksWithWrappers( DataFlowIntegrationInput::Guard g, SsaImpl::Definition def, IRGuards::GuardValue val, - int indirectionIndex + ParamIntPair

::MkPair pair ) { - exists(Instruction e | - IRGuards::Guards_v1::ParameterizedValidationWrapper::guardChecks(g, - e, val, indirectionIndex) + exists(Instruction e, int indirectionIndex | + IRGuards::Guards_v1::ParameterizedValidationWrapper::TPair, guardChecksInstr/4>::guardChecks(g, + e, val, pair) and + pair = ParamIntPair

::MkPair(_, indirectionIndex) | indirectionIndex = 0 and def.(Definition).getAUse().getDef() = e @@ -1075,18 +1087,19 @@ module BarrierGuardWithIntParam { ) } - Node getABarrierNode(int indirectionIndex) { + Node getABarrierNode(int indirectionIndex, P p) { // Only get the SynthNodes from the shared implementation, as the ExprNodes cannot // be matched on SourceVariable. result.(SsaSynthNode).getSynthNode() = - DataFlowIntegrationImpl::BarrierGuardDefWithState::getABarrierNode(indirectionIndex) + DataFlowIntegrationImpl::BarrierGuardDefWithState::MkPair, guardChecksWithWrappers/4>::getABarrierNode(ParamIntPair

::MkPair(p, + indirectionIndex)) or // Calculate the guarded UseImpls corresponding to ExprNodes directly. exists( DataFlowIntegrationInput::Guard g, IRGuards::GuardValue branch, Definition def, IRBlock bb | - guardChecksWithWrappers(g, def, branch, indirectionIndex) and exists(UseImpl use | + guardChecksWithWrappers(g, def, branch, ParamIntPair

::MkPair(p, indirectionIndex)) and ssaDefReachesCertainUse(def, use) and use.getBlock() = bb and DataFlowIntegrationInput::guardControlsBlock(g, bb, branch) and @@ -1096,15 +1109,16 @@ module BarrierGuardWithIntParam { } } -module BarrierGuard { +module BarrierGuard::guardChecksNodeSig/4 guardChecksNode> { private predicate guardChecksNode( - IRGuards::IRGuardCondition g, Node e, boolean branch, int indirectionIndex + IRGuards::IRGuardCondition g, Node e, boolean gv, int indirectionIndex, P p ) { - guardChecksNode(g, e, branch) and indirectionIndex = 0 + indirectionIndex = 0 and + guardChecksNode(g, e, gv, p) } - Node getABarrierNode() { - result = BarrierGuardWithIntParam::getABarrierNode(0) + Node getABarrierNode(P p) { + result = BarrierGuardWithIntParam::getABarrierNode(0, p) } } diff --git a/cpp/ql/test/library-tests/dataflow/ir-barrier-guards/test.cpp b/cpp/ql/test/library-tests/dataflow/ir-barrier-guards/test.cpp index 9d9a89c70087..6c4b45db48f1 100644 --- a/cpp/ql/test/library-tests/dataflow/ir-barrier-guards/test.cpp +++ b/cpp/ql/test/library-tests/dataflow/ir-barrier-guards/test.cpp @@ -12,4 +12,38 @@ void testCheckArgument(int p) { if (checkArgument(&p)) { sink(p); // $ barrier=glval indirect_barrier=int } +} + +int* get_clean_value(int* x) { return x; } +bool is_clean_value(int*); + +int* get_clean_pointer(int* x) { return x; } +bool is_clean_pointer(int*); + +void sink(int*); + +void test_mad(int x, int* p) { + { + if(is_clean_value(&x)) { + sink(x); // $ external=int + } + } + + { + if(is_clean_value(p)) { + sink(*p); // $ external=int + } + } + + { + if(is_clean_pointer(p)) { + sink(p); // $ external=int* + } + } + + { + if(is_clean_pointer(&x)) { + sink(x); // $ external=glval + } + } } \ No newline at end of file diff --git a/cpp/ql/test/library-tests/dataflow/ir-barrier-guards/test.ext.yml b/cpp/ql/test/library-tests/dataflow/ir-barrier-guards/test.ext.yml new file mode 100644 index 000000000000..8eeed7cd0b55 --- /dev/null +++ b/cpp/ql/test/library-tests/dataflow/ir-barrier-guards/test.ext.yml @@ -0,0 +1,13 @@ +extensions: + - addsTo: + pack: codeql/cpp-all + extensible: barrierModel + data: + - ["", "", False, "get_clean_pointer", "", "", "ReturnValue", "test-barrier", "manual"] + - ["", "", False, "get_clean_data", "", "", "ReturnValue[*]", "test-barrier", "manual"] + - addsTo: + pack: codeql/cpp-all + extensible: barrierGuardModel + data: + - ["", "", False, "is_clean_value", "", "", "Argument[*0]", "true", "test-barrier", "manual"] + - ["", "", False, "is_clean_pointer", "", "", "Argument[0]", "true", "test-barrier", "manual"] \ No newline at end of file diff --git a/cpp/ql/test/library-tests/dataflow/ir-barrier-guards/test.ql b/cpp/ql/test/library-tests/dataflow/ir-barrier-guards/test.ql index 5961da371228..8948c65e33c0 100644 --- a/cpp/ql/test/library-tests/dataflow/ir-barrier-guards/test.ql +++ b/cpp/ql/test/library-tests/dataflow/ir-barrier-guards/test.ql @@ -2,6 +2,7 @@ import cpp import semmle.code.cpp.dataflow.new.DataFlow import semmle.code.cpp.controlflow.IRGuards import utils.test.InlineExpectationsTest +import semmle.code.cpp.dataflow.ExternalFlow predicate instructionGuardChecks(IRGuardCondition gc, Instruction checked, boolean branch) { exists(CallInstruction call | @@ -27,18 +28,26 @@ predicate barrierGuard(DataFlow::Node node, string s) { else s = node.getType().toString().replaceAll(" ", "") } +predicate externalBarrierGuard(DataFlow::Node node, string s) { + barrierNode(node, "test-barrier") and + if node.isGLValue() + then s = "glval<" + node.getType().toString().replaceAll(" ", "") + ">" + else s = node.getType().toString().replaceAll(" ", "") +} + module Test implements TestSig { - string getARelevantTag() { result = ["barrier", "indirect_barrier"] } + string getARelevantTag() { result = ["barrier", "indirect_barrier", "external"] } predicate hasActualResult(Location location, string element, string tag, string value) { - exists(DataFlow::Node node, string s | - indirectBarrierGuard(node, s) and - value = s and + exists(DataFlow::Node node | + indirectBarrierGuard(node, value) and tag = "indirect_barrier" or - barrierGuard(node, s) and - value = s and + barrierGuard(node, value) and tag = "barrier" + or + externalBarrierGuard(node, value) and + tag = "external" | element = node.toString() and location = node.getLocation()