-
Notifications
You must be signed in to change notification settings - Fork 1.9k
Python: Add exception, reachability, and other kinds of modelling #21668
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Changes from 2 commits
e14d493
993311e
6efedb7
ec9e72e
3e7986a
205466d
ca59ca0
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -2159,3 +2159,288 @@ module DuckTyping { | |
| f.getADecorator().(Name).getId() = "property" | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * Provides a class hierarchy for exception types, covering both builtin | ||
| * exceptions (from typeshed models) and user-defined exception classes. | ||
| */ | ||
| module ExceptionTypes { | ||
| private import semmle.python.ApiGraphs | ||
| private import semmle.python.frameworks.data.internal.ApiGraphModels | ||
|
|
||
| /** Holds if `name` is a builtin exception class name. */ | ||
| predicate builtinException(string name) { | ||
| typeModel("builtins.BaseException~Subclass", "builtins." + name, "") | ||
| } | ||
|
|
||
| /** Holds if builtin exception `sub` is a direct subclass of builtin exception `base`. */ | ||
| private predicate builtinExceptionSubclass(string base, string sub) { | ||
| typeModel("builtins." + base + "~Subclass", "builtins." + sub, "") | ||
| } | ||
|
|
||
| /** An exception type, either a builtin exception or a user-defined exception class. */ | ||
| newtype TExceptType = | ||
| /** A user-defined exception class. */ | ||
| TUserExceptType(Class c) or | ||
| /** A builtin exception class, identified by name. */ | ||
| TBuiltinExceptType(string name) { builtinException(name) } | ||
|
|
||
| /** An exception type, either a builtin exception or a user-defined exception class. */ | ||
| class ExceptType extends TExceptType { | ||
| /** Gets the name of this exception type. */ | ||
| string getName() { none() } | ||
|
|
||
| /** Gets a data-flow node that refers to this exception type. */ | ||
| DataFlow::Node getAUse() { none() } | ||
|
|
||
| /** Gets a direct superclass of this exception type. */ | ||
| ExceptType getADirectSuperclass() { none() } | ||
|
|
||
| /** Gets a string representation of this exception type. */ | ||
| string toString() { result = this.getName() } | ||
|
|
||
| /** Holds if this exception type may be raised at control flow node `r`. */ | ||
| predicate isRaisedAt(ControlFlowNode r) { | ||
| exists(Expr raised | | ||
| raised = r.getNode().(Raise).getRaised() and | ||
| this.getAUse().asExpr() in [raised, raised.(Call).getFunc()] | ||
| ) | ||
| or | ||
| exists(Function callee | | ||
| resolveCall(r, callee, _) and | ||
| this.isRaisedIn(callee) | ||
| ) | ||
| } | ||
|
|
||
| /** | ||
| * Holds if this exception type may be raised in function `f`, either | ||
| * directly via `raise` statements or transitively through calls to other functions. | ||
| */ | ||
| predicate isRaisedIn(Function f) { this.isRaisedAt(any(ControlFlowNode r | r.getScope() = f)) } | ||
|
|
||
| /** Holds if this exception type is handled by the `except` clause at `handler`. */ | ||
| predicate isHandledAt(ExceptFlowNode handler) { | ||
| exists(ExceptStmt ex, Expr typeExpr | ex = handler.getNode() | | ||
| ( | ||
| typeExpr = ex.getType() | ||
| or | ||
| typeExpr = ex.getType().(Tuple).getAnElt() | ||
| ) and | ||
| this.getAUse().asExpr() = typeExpr | ||
| ) | ||
| or | ||
| // A bare `except:` handles everything | ||
| not exists(handler.getNode().(ExceptStmt).getType()) and | ||
| this.(BuiltinExceptType).getName() = "BaseException" | ||
| } | ||
|
|
||
| /** | ||
| * Holds if this element is at the specified location. | ||
| * The location spans column `startColumn` of line `startLine` to | ||
| * column `endColumn` of line `endLine` in file `filepath`. | ||
| * For more information, see | ||
| * [Providing locations in CodeQL queries](https://codeql.github.com/docs/writing-codeql-queries/providing-locations-in-codeql-queries/). | ||
| */ | ||
| predicate hasLocationInfo( | ||
| string filePath, int startLine, int startColumn, int endLine, int endColumn | ||
| ) { | ||
| none() | ||
| } | ||
| } | ||
|
|
||
| /** A user-defined exception class. */ | ||
| class UserExceptType extends ExceptType, TUserExceptType { | ||
| Class cls; | ||
|
|
||
| UserExceptType() { this = TUserExceptType(cls) } | ||
|
|
||
| /** Gets the underlying class. */ | ||
| Class asClass() { result = cls } | ||
|
|
||
| override string getName() { result = cls.getName() } | ||
|
|
||
| override DataFlow::Node getAUse() { result = classTracker(cls) } | ||
|
|
||
| override ExceptType getADirectSuperclass() { | ||
| result.(UserExceptType).asClass() = getADirectSuperclass(cls) | ||
| or | ||
| result.(BuiltinExceptType).getAUse().asExpr() = cls.getABase() | ||
| } | ||
|
|
||
| override predicate hasLocationInfo( | ||
| string filePath, int startLine, int startColumn, int endLine, int endColumn | ||
| ) { | ||
| cls.getLocation().hasLocationInfo(filePath, startLine, startColumn, endLine, endColumn) | ||
| } | ||
| } | ||
|
|
||
| /** A builtin exception class, identified by name. */ | ||
| class BuiltinExceptType extends ExceptType, TBuiltinExceptType { | ||
| string name; | ||
|
|
||
| BuiltinExceptType() { this = TBuiltinExceptType(name) } | ||
|
|
||
| /** Gets the builtin name. */ | ||
| string asBuiltinName() { result = name } | ||
|
|
||
| override string getName() { result = name } | ||
|
|
||
| override DataFlow::Node getAUse() { API::builtin(name).asSource().flowsTo(result) } | ||
|
|
||
| override ExceptType getADirectSuperclass() { | ||
| builtinExceptionSubclass(result.(BuiltinExceptType).asBuiltinName(), name) and | ||
| result != this | ||
| } | ||
|
|
||
| override predicate hasLocationInfo( | ||
| string filePath, int startLine, int startColumn, int endLine, int endColumn | ||
| ) { | ||
| filePath = "" and | ||
| startLine = 0 and | ||
| startColumn = 0 and | ||
| endLine = 0 and | ||
| endColumn = 0 | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * Holds if the exception edge from `r` to `handler` is unlikely because | ||
| * none of the exception types that `r` may raise are handled by `handler`. | ||
| */ | ||
| predicate unlikelyExceptionEdge(ControlFlowNode r, ExceptFlowNode handler) { | ||
| handler = r.getAnExceptionalSuccessor() and | ||
| // We can determine at least one raised type | ||
| exists(ExceptType t | t.isRaisedAt(r)) and | ||
| // But none of them are handled by this handler | ||
| not exists(ExceptType raised, ExceptType handled | | ||
| raised.isRaisedAt(r) and | ||
| handled.isHandledAt(handler) and | ||
| raised.getADirectSuperclass*() = handled | ||
| ) | ||
| } | ||
| } | ||
|
|
||
| /** | ||
| * Provides predicates for reasoning about the reachability of control flow nodes | ||
| * and basic blocks. | ||
| */ | ||
| module Reachability { | ||
| private import semmle.python.ApiGraphs | ||
| import ExceptionTypes | ||
|
|
||
| /** | ||
| * Holds if `call` is a call to a function that is known to never return normally | ||
| * (e.g. `sys.exit()`, `os._exit()`, `os.abort()`). | ||
| */ | ||
| predicate isCallToNeverReturningFunction(CallNode call) { | ||
| // Known never-returning builtins/stdlib functions via API graphs | ||
| call = API::builtin("exit").getACall().asCfgNode() | ||
| or | ||
| call = API::builtin("quit").getACall().asCfgNode() | ||
| or | ||
| call = API::moduleImport("sys").getMember("exit").getACall().asCfgNode() | ||
| or | ||
| call = API::moduleImport("os").getMember("_exit").getACall().asCfgNode() | ||
| or | ||
| call = API::moduleImport("os").getMember("abort").getACall().asCfgNode() | ||
| or | ||
| // User-defined functions that only contain raise statements (no normal returns) | ||
| exists(Function target | | ||
| resolveCall(call, target, _) and | ||
| neverReturns(target) | ||
| ) | ||
| } | ||
|
|
||
| /** | ||
| * Holds if function `f` never returns normally, because every normal exit | ||
| * is dominated by a call to a never-returning function or an unconditional raise. | ||
| */ | ||
| predicate neverReturns(Function f) { | ||
| exists(f.getANormalExit()) and | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Why is this conjunct included? If it is to keep the predicate small, then it seems to have a more specialized context than its name and doc suggests. |
||
| forall(BasicBlock exit | exit = f.getANormalExit().getBasicBlock() | | ||
| exists(BasicBlock raising | | ||
| raising.dominates(exit) and | ||
| ( | ||
| isCallToNeverReturningFunction(raising.getLastNode()) | ||
| or | ||
| raising.getLastNode().getNode() instanceof Raise | ||
| ) | ||
| ) | ||
| ) | ||
| } | ||
|
|
||
| /** | ||
| * Holds if `node` is unlikely to raise an exception. This includes entry nodes | ||
| * and simple name lookups. | ||
| */ | ||
| private predicate unlikelyToRaise(ControlFlowNode node) { | ||
| exists(node.getAnExceptionalSuccessor()) and | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I expect this is here to keep the predicate small. |
||
| ( | ||
| node.getNode() instanceof Name | ||
| or | ||
| exists(Scope s | s.getEntryNode() = node) | ||
| ) | ||
| } | ||
|
|
||
| /** | ||
| * Holds if it is highly unlikely for control to flow from `node` to `succ`. | ||
| */ | ||
| predicate unlikelySuccessor(ControlFlowNode node, ControlFlowNode succ) { | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Perhaps the doc should make clear that an |
||
| // Exceptional edge where the raised type doesn't match the handler | ||
| unlikelyExceptionEdge(node, succ) | ||
| or | ||
| // Normal successor of a never-returning call | ||
| isCallToNeverReturningFunction(node) and | ||
| succ = node.getASuccessor() and | ||
| not succ = node.getAnExceptionalSuccessor() and | ||
| not succ.getNode() instanceof Yield | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. How does |
||
| or | ||
| // Exception edge from a node that is unlikely to raise | ||
| unlikelyToRaise(node) and | ||
| succ = node.getAnExceptionalSuccessor() | ||
| } | ||
|
|
||
| private predicate startBbLikelyReachable(BasicBlock b) { | ||
| exists(Scope s | s.getEntryNode() = b.getNode(_)) | ||
| or | ||
| exists(BasicBlock pred | | ||
| pred = b.getAPredecessor() and | ||
| endBbLikelyReachable(pred) and | ||
| not unlikelySuccessor(pred.getLastNode(), b) | ||
| ) | ||
| } | ||
|
|
||
| private predicate endBbLikelyReachable(BasicBlock b) { | ||
| startBbLikelyReachable(b) and | ||
| not exists(ControlFlowNode p, ControlFlowNode s | | ||
| unlikelySuccessor(p, s) and | ||
| p = b.getNode(_) and | ||
| s = b.getNode(_) and | ||
| not p = b.getLastNode() | ||
| ) | ||
| } | ||
|
|
||
| /** | ||
| * Holds if basic block `b` is likely to be reachable from the entry of its | ||
| * enclosing scope. | ||
| */ | ||
| predicate likelyReachable(BasicBlock b) { startBbLikelyReachable(b) } | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I would have expected us to export a predicate on control flow nodes here.. |
||
|
|
||
| /** | ||
| * Holds if it is unlikely that `node` can be reached during execution. | ||
| */ | ||
| predicate unlikelyReachable(ControlFlowNode node) { | ||
| not startBbLikelyReachable(node.getBasicBlock()) | ||
| or | ||
| exists(BasicBlock b | | ||
| startBbLikelyReachable(b) and | ||
| not endBbLikelyReachable(b) and | ||
| exists(ControlFlowNode p, int i, int j | | ||
| unlikelySuccessor(p, _) and | ||
| p = b.getNode(i) and | ||
| node = b.getNode(j) and | ||
| i < j | ||
| ) | ||
| ) | ||
| } | ||
| } | ||
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
I know this is effectively what was there before, but would it be possible to not have all classes here?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
As in: would you prefer that these were moved to a separate file? If not, then I'm not sure what you're asking.
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Oh, I think I understand what you're saying now: could we restrict
clsto be a class that inherits from a built-in exception (since only these are actually valid in araise)?The answer is "I guess?", but I'm not sure it would make much of a difference. It would also make
isLegalExceptionTypepointless (and make it more complicated to check when people try to raise something that isn't a valid exception).Do you have a specific reason to want to have fewer classes in this type?