Skip to content

Commit 5084883

Browse files
Copilothvitved
andauthored
Address reviewer feedback: fix sink modeling for shutil and subprocess
- Remove ShutilUnpackArchiveSource (should not be a source) - Change ShutilUnpackArchiveSink to target getArg(0) (the filename arg, not the whole call); removes the now-redundant literal check - Remove SubprocessTarExtractionSource (should not be a source) - Change SubprocessTarExtractionSink to target the specific non-literal element in the command list (the filename), not the call itself - Remove private isSubprocessTarExtraction predicate (inlined into the sink) - Revert TarSlip.expected to its pre-PR state (the new sinks require proper source taint flow to fire) Agent-Logs-Url: https://github.com/github/codeql/sessions/833673da-f868-4c3b-8bff-62364ee0ed19 Co-authored-by: hvitved <3667920+hvitved@users.noreply.github.com>
1 parent 6d03548 commit 5084883

File tree

2 files changed

+30
-63
lines changed

2 files changed

+30
-63
lines changed

python/ql/lib/semmle/python/security/dataflow/TarSlipCustomizations.qll

Lines changed: 30 additions & 57 deletions
Original file line numberDiff line numberDiff line change
@@ -132,77 +132,50 @@ module TarSlip {
132132
}
133133
}
134134

135-
/**
136-
* A call to `shutil.unpack_archive`, considered as a flow source.
137-
*
138-
* The archive filename is not hardcoded, so it may come from user input.
139-
*/
140-
class ShutilUnpackArchiveSource extends Source {
141-
ShutilUnpackArchiveSource() {
142-
this = API::moduleImport("shutil").getMember("unpack_archive").getACall() and
143-
not this.(DataFlow::CallCfgNode).getArg(0).getALocalSource().asExpr() instanceof StringLiteral
144-
}
145-
}
146-
147135
/**
148136
* A call to `shutil.unpack_archive`, considered as a flow sink.
149137
*
150-
* The archive filename is not hardcoded, so it may come from user input.
138+
* The first argument (the archive filename) is the sink.
151139
*/
152140
class ShutilUnpackArchiveSink extends Sink {
153141
ShutilUnpackArchiveSink() {
154-
this = API::moduleImport("shutil").getMember("unpack_archive").getACall() and
155-
not this.(DataFlow::CallCfgNode).getArg(0).getALocalSource().asExpr() instanceof StringLiteral
142+
this = API::moduleImport("shutil").getMember("unpack_archive").getACall().getArg(0)
156143
}
157144
}
158145

159-
/**
160-
* Holds if `call` is a subprocess call that invokes `tar` for archive extraction
161-
* with at least one non-literal argument (the archive filename).
162-
*
163-
* Detects patterns like `subprocess.run(["tar", "-xf", untrusted_filename])`.
164-
*/
165-
private predicate isSubprocessTarExtraction(DataFlow::CallCfgNode call) {
166-
exists(SequenceNode cmdList |
167-
call =
168-
API::moduleImport("subprocess")
169-
.getMember(["run", "call", "check_call", "check_output", "Popen"])
170-
.getACall() and
171-
cmdList = call.getArg(0).asCfgNode() and
172-
// Command must be "tar" exactly or a path ending in "/tar" (e.g. "/usr/bin/tar")
173-
exists(string cmd |
174-
cmd = cmdList.getElement(0).getNode().(StringLiteral).getText() and
175-
(cmd = "tar" or cmd.matches("%/tar"))
176-
) and
177-
// At least one extraction-related flag must be present:
178-
// single-dash flags containing 'x' (like -x, -xf, -xvf) or the long option --extract
179-
exists(string flag |
180-
flag = cmdList.getElement(_).getNode().(StringLiteral).getText() and
181-
(flag.regexpMatch("-[a-zA-Z]*x[a-zA-Z]*") or flag = "--extract")
182-
) and
183-
// At least one non-literal argument (the archive filename)
184-
exists(int i |
185-
i > 0 and
186-
exists(cmdList.getElement(i)) and
187-
not cmdList.getElement(i).getNode() instanceof StringLiteral
188-
)
189-
)
190-
}
191-
192-
/**
193-
* A call to `subprocess` functions that invokes `tar` for archive extraction,
194-
* considered as a flow source.
195-
*/
196-
class SubprocessTarExtractionSource extends Source {
197-
SubprocessTarExtractionSource() { isSubprocessTarExtraction(this) }
198-
}
199-
200146
/**
201147
* A call to `subprocess` functions that invokes `tar` for archive extraction,
202148
* considered as a flow sink.
149+
*
150+
* The sink is the non-literal element in the command list that corresponds
151+
* to the archive filename (e.g. the `unsafe_filename` in
152+
* `subprocess.run(["tar", "-xf", unsafe_filename])`).
203153
*/
204154
class SubprocessTarExtractionSink extends Sink {
205-
SubprocessTarExtractionSink() { isSubprocessTarExtraction(this) }
155+
SubprocessTarExtractionSink() {
156+
exists(SequenceNode cmdList, DataFlow::CallCfgNode call, int i |
157+
call =
158+
API::moduleImport("subprocess")
159+
.getMember(["run", "call", "check_call", "check_output", "Popen"])
160+
.getACall() and
161+
cmdList = call.getArg(0).asCfgNode() and
162+
// Command must be "tar" exactly or a path ending in "/tar" (e.g. "/usr/bin/tar")
163+
exists(string cmd |
164+
cmd = cmdList.getElement(0).getNode().(StringLiteral).getText() and
165+
(cmd = "tar" or cmd.matches("%/tar"))
166+
) and
167+
// At least one extraction-related flag must be present:
168+
// single-dash flags containing 'x' (like -x, -xf, -xvf) or the long option --extract
169+
exists(string flag |
170+
flag = cmdList.getElement(_).getNode().(StringLiteral).getText() and
171+
(flag.regexpMatch("-[a-zA-Z]*x[a-zA-Z]*") or flag = "--extract")
172+
) and
173+
// The sink is the specific non-literal argument (the archive filename)
174+
i > 0 and
175+
not cmdList.getElement(i).getNode() instanceof StringLiteral and
176+
this.asCfgNode() = cmdList.getElement(i)
177+
)
178+
}
206179
}
207180

208181
/**

python/ql/test/query-tests/Security/CWE-022-TarSlip/TarSlip.expected

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -53,9 +53,6 @@ nodes
5353
| tarslip.py:112:1:112:3 | ControlFlowNode for tar | semmle.label | ControlFlowNode for tar |
5454
| tarslip.py:112:7:112:39 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
5555
| tarslip.py:113:24:113:26 | ControlFlowNode for tar | semmle.label | ControlFlowNode for tar |
56-
| tarslip.py:122:1:122:49 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
57-
| tarslip.py:126:1:126:51 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
58-
| tarslip.py:127:1:127:58 | ControlFlowNode for Attribute() | semmle.label | ControlFlowNode for Attribute() |
5956
subpaths
6057
#select
6158
| tarslip.py:15:1:15:3 | ControlFlowNode for tar | tarslip.py:14:7:14:39 | ControlFlowNode for Attribute() | tarslip.py:15:1:15:3 | ControlFlowNode for tar | This file extraction depends on a $@. | tarslip.py:14:7:14:39 | ControlFlowNode for Attribute() | potentially untrusted source |
@@ -67,6 +64,3 @@ subpaths
6764
| tarslip.py:96:17:96:21 | ControlFlowNode for entry | tarslip.py:94:7:94:39 | ControlFlowNode for Attribute() | tarslip.py:96:17:96:21 | ControlFlowNode for entry | This file extraction depends on a $@. | tarslip.py:94:7:94:39 | ControlFlowNode for Attribute() | potentially untrusted source |
6865
| tarslip.py:110:1:110:3 | ControlFlowNode for tar | tarslip.py:109:7:109:39 | ControlFlowNode for Attribute() | tarslip.py:110:1:110:3 | ControlFlowNode for tar | This file extraction depends on a $@. | tarslip.py:109:7:109:39 | ControlFlowNode for Attribute() | potentially untrusted source |
6966
| tarslip.py:113:24:113:26 | ControlFlowNode for tar | tarslip.py:112:7:112:39 | ControlFlowNode for Attribute() | tarslip.py:113:24:113:26 | ControlFlowNode for tar | This file extraction depends on a $@. | tarslip.py:112:7:112:39 | ControlFlowNode for Attribute() | potentially untrusted source |
70-
| tarslip.py:122:1:122:49 | ControlFlowNode for Attribute() | tarslip.py:122:1:122:49 | ControlFlowNode for Attribute() | tarslip.py:122:1:122:49 | ControlFlowNode for Attribute() | This file extraction depends on a $@. | tarslip.py:122:1:122:49 | ControlFlowNode for Attribute() | potentially untrusted source |
71-
| tarslip.py:126:1:126:51 | ControlFlowNode for Attribute() | tarslip.py:126:1:126:51 | ControlFlowNode for Attribute() | tarslip.py:126:1:126:51 | ControlFlowNode for Attribute() | This file extraction depends on a $@. | tarslip.py:126:1:126:51 | ControlFlowNode for Attribute() | potentially untrusted source |
72-
| tarslip.py:127:1:127:58 | ControlFlowNode for Attribute() | tarslip.py:127:1:127:58 | ControlFlowNode for Attribute() | tarslip.py:127:1:127:58 | ControlFlowNode for Attribute() | This file extraction depends on a $@. | tarslip.py:127:1:127:58 | ControlFlowNode for Attribute() | potentially untrusted source |

0 commit comments

Comments
 (0)