Merge pull request #20392 from joefarebrother/python-qual-file-not-closed

Python: Improve File Not Closed query to reduce false positives and provide clearer alerts
This commit is contained in:
Joe Farebrother
2025-09-18 09:33:08 +01:00
committed by GitHub
7 changed files with 92 additions and 63 deletions

View File

@@ -15,12 +15,14 @@
import python
import FileNotAlwaysClosedQuery
import codeql.util.Option
from FileOpen fo, string msg
from FileOpen fo, string msg, LocatableOption<Location, DataFlow::Node>::Option exec
where
fileNotClosed(fo) and
msg = "File is opened but is not closed."
msg = "File is opened but is not closed." and
exec.isNone()
or
fileMayNotBeClosedOnException(fo, _) and
msg = "File may not be closed if an exception is raised."
select fo, msg
fileMayNotBeClosedOnException(fo, exec.asSome()) and
msg = "File may not be closed if $@ raises an exception."
select fo, msg, exec, "this operation"

View File

@@ -34,6 +34,8 @@ class FileWrapperCall extends DataFlow::CallCfgNode {
DataFlow::Node wrapped;
FileWrapperCall() {
// Approximation: Treat any passing of a file object to a class constructor as potentially a wrapper
// This could be made more precise by checking that the constructor writes the file to a field.
wrapped = this.getArg(_).getALocalSource() and
this.getFunction() = classTracker(_)
or
@@ -50,22 +52,15 @@ class FileWrapperCall extends DataFlow::CallCfgNode {
/** A node where a file is closed. */
abstract class FileClose extends DataFlow::CfgNode {
/** Holds if this file close will occur if an exception is raised at `raises`. */
predicate guardsExceptions(DataFlow::CfgNode raises) {
/** Holds if this file close will occur if an exception is raised at `fileRaises`. */
predicate guardsExceptions(DataFlow::CfgNode fileRaises) {
// The close call occurs after an exception edge in the cfg (a catch or finally)
bbReachableRefl(raises.asCfgNode().getBasicBlock().getAnExceptionalSuccessor(),
bbReachableRefl(fileRaises.asCfgNode().getBasicBlock().getAnExceptionalSuccessor(),
this.asCfgNode().getBasicBlock())
or
// The exception is after the close call.
// A full cfg reachability check is not in general feasible for performance, so we approximate it with:
// - A basic block reachability check (here) that works if the expression and close call are in different basic blocks
// - A check (in the `WithStatement` override of `guardsExceptions`) for the case where the exception call
// is lexically contained in the body of a `with` statement that closes the file.
// This may cause FPs in a case such as:
// f.close()
// f.write("...")
// We presume this to not be very common.
bbReachableStrict(this.asCfgNode().getBasicBlock(), raises.asCfgNode().getBasicBlock())
// A full cfg reachability check is not feasible for performance, instead we use local dataflow
fileLocalFlow(this, fileRaises)
}
}
@@ -94,11 +89,10 @@ class WithStatement extends FileClose {
WithStatement() { this.asExpr() = w.getContextExpr() }
override predicate guardsExceptions(DataFlow::CfgNode raises) {
super.guardsExceptions(raises)
override predicate guardsExceptions(DataFlow::CfgNode fileRaises) {
super.guardsExceptions(fileRaises)
or
// Check whether the exception is raised in the body of the with statement.
raises.asExpr().getParent*() = w.getBody().getAnItem()
w.getBody().contains(fileRaises.asExpr())
}
}
@@ -131,7 +125,7 @@ private predicate fileLocalFlowHelper1(
/** Holds if data flows from `source` to `sink`, including file wrapper classes. */
pragma[inline]
private predicate fileLocalFlow(FileOpen source, DataFlow::Node sink) {
private predicate fileLocalFlow(DataFlow::Node source, DataFlow::Node sink) {
exists(DataFlow::LocalSourceNode mid | fileLocalFlowHelper1(source, mid) and mid.flowsTo(sink))
}
@@ -171,7 +165,7 @@ predicate fileMayNotBeClosedOnException(FileOpen fo, DataFlow::Node raises) {
fileLocalFlow(fo, fileRaised) and
not exists(FileClose fc |
fileLocalFlow(fo, fc) and
fc.guardsExceptions(raises)
fc.guardsExceptions(fileRaised)
)
)
}

View File

@@ -0,0 +1,9 @@
| resources_test.py:4:10:4:25 | ControlFlowNode for open() | File may not be closed if $@ raises an exception. | resources_test.py:5:5:5:33 | ControlFlowNode for Attribute() | this operation |
| resources_test.py:9:10:9:25 | ControlFlowNode for open() | File is opened but is not closed. | file://:0:0:0:0 | (none) | this operation |
| resources_test.py:108:11:108:20 | ControlFlowNode for open() | File is opened but is not closed. | file://:0:0:0:0 | (none) | this operation |
| resources_test.py:112:11:112:28 | ControlFlowNode for opener_func2() | File may not be closed if $@ raises an exception. | resources_test.py:113:5:113:22 | ControlFlowNode for Attribute() | this operation |
| resources_test.py:123:11:123:24 | ControlFlowNode for opener_func2() | File is opened but is not closed. | file://:0:0:0:0 | (none) | this operation |
| resources_test.py:129:15:129:24 | ControlFlowNode for open() | File may not be closed if $@ raises an exception. | resources_test.py:130:9:130:26 | ControlFlowNode for Attribute() | this operation |
| resources_test.py:248:11:248:25 | ControlFlowNode for open() | File is opened but is not closed. | file://:0:0:0:0 | (none) | this operation |
| resources_test.py:269:10:269:27 | ControlFlowNode for Attribute() | File may not be closed if $@ raises an exception. | resources_test.py:271:5:271:19 | ControlFlowNode for Attribute() | this operation |
| resources_test.py:285:11:285:20 | ControlFlowNode for open() | File may not be closed if $@ raises an exception. | resources_test.py:287:5:287:31 | ControlFlowNode for Attribute() | this operation |

View File

@@ -0,0 +1,2 @@
query: Resources/FileNotAlwaysClosed.ql
postprocess: utils/test/InlineExpectationsTestQuery.ql

View File

@@ -1,12 +1,12 @@
#File not always closed
def not_close1():
f1 = open("filename") # $ notClosedOnException
f1 = open("filename") # $ Alert # not closed on exception
f1.write("Error could occur")
f1.close()
def not_close2():
f2 = open("filename") # $ notClosed
f2 = open("filename") # $ Alert
def closed3():
f3 = open("filename")
@@ -46,7 +46,7 @@ def closed7():
def not_closed8():
f8 = None
try:
f8 = open("filename") # $ MISSING:notClosedOnException
f8 = open("filename") # $ MISSING:Alert # not closed on exception
f8.write("Error could occur")
finally:
if f8 is None: # We don't precisely consider this condition, so this result is MISSING. However, this seems uncommon.
@@ -105,11 +105,11 @@ def opener_func2(name):
return t1
def not_closed13(name):
f13 = open(name) # $ notClosed
f13 = open(name) # $ Alert
f13.write("Hello")
def may_not_be_closed14(name):
f14 = opener_func2(name) # $ notClosedOnException
f14 = opener_func2(name) # $ Alert # not closed on exception
f14.write("Hello")
f14.close()
@@ -120,13 +120,13 @@ def closer2(t3):
closer1(t3)
def closed15():
f15 = opener_func2() # $ SPURIOUS:notClosed
f15 = opener_func2() # $ SPURIOUS:Alert
closer2(f15) # We don't detect that this call closes the file, so this result is SPURIOUS.
def may_not_be_closed16(name):
try:
f16 = open(name) # $ notClosedOnException
f16 = open(name) # $ Alert # not closed on exception
f16.write("Hello")
f16.close()
except IOError:
@@ -138,7 +138,7 @@ def may_raise():
#Not handling all exceptions, but we'll tolerate the false negative
def not_closed17():
f17 = open("filename") # $ MISSING:notClosedOnException
f17 = open("filename") # $ MISSING:Alert # not closed on exception
try:
f17.write("IOError could occur")
f17.write("IOError could occur")
@@ -151,11 +151,11 @@ def not_closed17():
#With statement will close the fp
def closed18(path):
try:
f18 = open(path)
f18 = open(path)
except IOError as ex:
print(ex)
raise ex
with f18:
with f18:
f18.read()
class Closed19(object):
@@ -234,7 +234,7 @@ def closed21(path):
def not_closed22(path):
f22 = open(path, "wb") # $ MISSING:notClosedOnException
f22 = open(path, "wb") # $ MISSING:Alert # not closed on exception
try:
f22.write(b"foo")
may_raise()
@@ -245,7 +245,7 @@ def not_closed22(path):
f22.close()
def not_closed23(path):
f23 = open(path, "w") # $ notClosed
f23 = open(path, "w") # $ Alert
wr = FileWrapper(f23)
def closed24(path):
@@ -266,7 +266,7 @@ def closed26(path):
os.close(fd)
def not_closed27(path):
fd = os.open(path, "w") # $notClosedOnException
fd = os.open(path, "w") # $Alert # not closed on exception
f27 = os.fdopen(fd, "w")
f27.write("hi")
f27.close()
@@ -282,6 +282,53 @@ def closed28(path):
def closed29(path):
# Due to an approximation in CFG reachability for performance, it is not detected that the `write` call that may raise occurs after the file has already been closed.
# We presume this case to be uncommon.
f28 = open(path) # $SPURIOUS:notClosedOnException
f28 = open(path) # $SPURIOUS:Alert # not closed on exception
f28.close()
f28.write("already closed")
f28.write("already closed")
# False positive in a previous implementation:
class NotWrapper:
def __init__(self, fp):
self.data = fp.read()
fp.close()
def do_something(self):
pass
def closed30(path):
# Combination of approximations resulted in this FP:
# - NotWrapper is treated as a wrapper class as a file handle is passed to it
# - thing.do_something() is treated as a call that can raise an exception while a file is open
# - this call is treated as occurring after the open but not as being guarded by the with statement, as it is in the same basic block
# - - this behavior has been changed fixing the FP
with open(path) as fp: # No longer spurious alert here.
thing = NotWrapper(fp)
thing.do_something()
def closed31(path):
with open(path) as fp:
data = fp.readline()
data2 = fp.readline()
class Wrapper():
def __init__(self, f):
self.f = f
def read(self):
return self.f.read()
def __enter__(self):
pass
def __exit__(self,exc_type, exc_value,traceback):
self.f.close()
def closed32(path):
with open(path, "rb") as f: # No longer spurious alert here.
wrap = Wrapper(f)
# This resulted in an FP in a previous implementation,
# due to a check that an operation is lexically contained within a `with` block (with `expr.getParent*()`)
# not detecting this case.
return list(wrap.read())

View File

@@ -1,25 +0,0 @@
import python
import Resources.FileNotAlwaysClosedQuery
import utils.test.InlineExpectationsTest
module MethodArgTest implements TestSig {
string getARelevantTag() { result = ["notClosed", "notClosedOnException"] }
predicate hasActualResult(Location location, string element, string tag, string value) {
exists(DataFlow::CfgNode el, FileOpen fo |
el = fo and
element = el.toString() and
location = el.getLocation() and
value = "" and
(
fileNotClosed(fo) and
tag = "notClosed"
or
fileMayNotBeClosedOnException(fo, _) and
tag = "notClosedOnException"
)
)
}
}
import MakeTest<MethodArgTest>