Python: Remove code duplication library

This commit is contained in:
Rasmus Wriedt Larsen
2021-03-25 15:27:55 +01:00
parent 09fbf480db
commit bd4934380a
30 changed files with 13 additions and 1207 deletions

View File

@@ -12,15 +12,7 @@
*/
import python
import external.CodeDuplication
from File f, int n
where
n =
count(int line |
exists(DuplicateBlock d | d.sourceFile() = f |
line in [d.sourceStartLine() .. d.sourceEndLine()] and
not allowlistedLineForDuplication(f, line)
)
)
where none()
select f, n order by n desc

View File

@@ -12,15 +12,7 @@
*/
import python
import external.CodeDuplication
from File f, int n
where
n =
count(int line |
exists(SimilarBlock d | d.sourceFile() = f |
line in [d.sourceStartLine() .. d.sourceEndLine()] and
not allowlistedLineForDuplication(f, line)
)
)
where none()
select f, n order by n desc

View File

@@ -1,273 +0,0 @@
/** Provides classes for detecting duplicate or similar code. */
import python
/** Gets the relative path of `file`, with backslashes replaced by forward slashes. */
private string relativePath(File file) { result = file.getRelativePath().replaceAll("\\", "/") }
/**
* Holds if the `index`-th token of block `copy` is in file `file`, spanning
* column `sc` of line `sl` to column `ec` of line `el`.
*
* For more information, see [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
*/
pragma[noinline, nomagic]
private predicate tokenLocation(File file, int sl, int sc, int ec, int el, Copy copy, int index) {
file = copy.sourceFile() and
tokens(copy, index, sl, sc, ec, el)
}
/** A token block used for detection of duplicate and similar code. */
class Copy extends @duplication_or_similarity {
private int lastToken() { result = max(int i | tokens(this, i, _, _, _, _) | i) }
/** Gets the index of the token in this block starting at the location `loc`, if any. */
int tokenStartingAt(Location loc) {
tokenLocation(loc.getFile(), loc.getStartLine(), loc.getStartColumn(), _, _, this, result)
}
/** Gets the index of the token in this block ending at the location `loc`, if any. */
int tokenEndingAt(Location loc) {
tokenLocation(loc.getFile(), _, _, loc.getEndLine(), loc.getEndColumn(), this, result)
}
/** Gets the line on which the first token in this block starts. */
int sourceStartLine() { tokens(this, 0, result, _, _, _) }
/** Gets the column on which the first token in this block starts. */
int sourceStartColumn() { tokens(this, 0, _, result, _, _) }
/** Gets the line on which the last token in this block ends. */
int sourceEndLine() { tokens(this, this.lastToken(), _, _, result, _) }
/** Gets the column on which the last token in this block ends. */
int sourceEndColumn() { tokens(this, this.lastToken(), _, _, _, result) }
/** Gets the number of lines containing at least (part of) one token in this block. */
int sourceLines() { result = this.sourceEndLine() + 1 - this.sourceStartLine() }
/** Gets an opaque identifier for the equivalence class of this block. */
int getEquivalenceClass() { duplicateCode(this, _, result) or similarCode(this, _, result) }
/** Gets the source file in which this block appears. */
File sourceFile() {
exists(string name | duplicateCode(this, name, _) or similarCode(this, name, _) |
name.replaceAll("\\", "/") = relativePath(result)
)
}
/**
* Holds if this element is at the specified location.
* The location spans column `startcolumn` of line `startline` to
* column `endcolumn` of line `endline` in file `filepath`.
* For more information, see
* [Locations](https://help.semmle.com/QL/learn-ql/ql/locations.html).
*/
predicate hasLocationInfo(
string filepath, int startline, int startcolumn, int endline, int endcolumn
) {
sourceFile().getAbsolutePath() = filepath and
startline = sourceStartLine() and
startcolumn = sourceStartColumn() and
endline = sourceEndLine() and
endcolumn = sourceEndColumn()
}
/** Gets a textual representation of this element. */
string toString() { result = "Copy" }
/**
* Gets a block that extends this one, that is, its first token is also
* covered by this block, but they are not the same block.
*/
Copy extendingBlock() {
exists(File file, int sl, int sc, int ec, int el |
tokenLocation(file, sl, sc, ec, el, this, _) and
tokenLocation(file, sl, sc, ec, el, result, 0)
) and
this != result
}
}
/**
* Holds if there is a sequence of `SimilarBlock`s `start1, ..., end1` and another sequence
* `start2, ..., end2` such that each block extends the previous one and corresponding blocks
* have the same equivalence class, with `start` being the equivalence class of `start1` and
* `start2`, and `end` the equivalence class of `end1` and `end2`.
*/
predicate similar_extension(
SimilarBlock start1, SimilarBlock start2, SimilarBlock ext1, SimilarBlock ext2, int start, int ext
) {
start1.getEquivalenceClass() = start and
start2.getEquivalenceClass() = start and
ext1.getEquivalenceClass() = ext and
ext2.getEquivalenceClass() = ext and
start1 != start2 and
(
ext1 = start1 and ext2 = start2
or
similar_extension(start1.extendingBlock(), start2.extendingBlock(), ext1, ext2, _, ext)
)
}
/**
* Holds if there is a sequence of `DuplicateBlock`s `start1, ..., end1` and another sequence
* `start2, ..., end2` such that each block extends the previous one and corresponding blocks
* have the same equivalence class, with `start` being the equivalence class of `start1` and
* `start2`, and `end` the equivalence class of `end1` and `end2`.
*/
predicate duplicate_extension(
DuplicateBlock start1, DuplicateBlock start2, DuplicateBlock ext1, DuplicateBlock ext2, int start,
int ext
) {
start1.getEquivalenceClass() = start and
start2.getEquivalenceClass() = start and
ext1.getEquivalenceClass() = ext and
ext2.getEquivalenceClass() = ext and
start1 != start2 and
(
ext1 = start1 and ext2 = start2
or
duplicate_extension(start1.extendingBlock(), start2.extendingBlock(), ext1, ext2, _, ext)
)
}
/** A block of duplicated code. */
class DuplicateBlock extends Copy, @duplication {
override string toString() { result = "Duplicate code: " + sourceLines() + " duplicated lines." }
}
/** A block of similar code. */
class SimilarBlock extends Copy, @similarity {
override string toString() {
result = "Similar code: " + sourceLines() + " almost duplicated lines."
}
}
/**
* Holds if `stmt1` and `stmt2` are duplicate statements in function or toplevel `sc1` and `sc2`,
* respectively, where `scope1` and `scope2` are not the same.
*/
predicate duplicateStatement(Scope scope1, Scope scope2, Stmt stmt1, Stmt stmt2) {
exists(int equivstart, int equivend, int first, int last |
scope1.contains(stmt1) and
scope2.contains(stmt2) and
duplicateCoversStatement(equivstart, equivend, first, last, stmt1) and
duplicateCoversStatement(equivstart, equivend, first, last, stmt2) and
stmt1 != stmt2 and
scope1 != scope2
)
}
/**
* Holds if statement `stmt` is covered by a sequence of `DuplicateBlock`s, where `first`
* is the index of the token in the first block that starts at the beginning of `stmt`,
* while `last` is the index of the token in the last block that ends at the end of `stmt`,
* and `equivstart` and `equivend` are the equivalence classes of the first and the last
* block, respectively.
*/
private predicate duplicateCoversStatement(
int equivstart, int equivend, int first, int last, Stmt stmt
) {
exists(DuplicateBlock b1, DuplicateBlock b2, Location startloc, Location endloc |
stmt.getLocation() = startloc and
stmt.getLastStatement().getLocation() = endloc and
first = b1.tokenStartingAt(startloc) and
last = b2.tokenEndingAt(endloc) and
b1.getEquivalenceClass() = equivstart and
b2.getEquivalenceClass() = equivend and
duplicate_extension(b1, _, b2, _, equivstart, equivend)
)
}
/**
* Holds if `sc1` is a function or toplevel with `total` lines, and `scope2` is a function or
* toplevel that has `duplicate` lines in common with `scope1`.
*/
predicate duplicateStatements(Scope scope1, Scope scope2, int duplicate, int total) {
duplicate = strictcount(Stmt stmt | duplicateStatement(scope1, scope2, stmt, _)) and
total = strictcount(Stmt stmt | scope1.contains(stmt))
}
/**
* Find pairs of scopes that are identical or almost identical
*/
predicate duplicateScopes(Scope s, Scope other, float percent, string message) {
exists(int total, int duplicate | duplicateStatements(s, other, duplicate, total) |
percent = 100.0 * duplicate / total and
percent >= 80.0 and
if duplicate = total
then message = "All " + total + " statements in " + s.getName() + " are identical in $@."
else
message =
duplicate + " out of " + total + " statements in " + s.getName() + " are duplicated in $@."
)
}
/**
* Holds if `stmt1` and `stmt2` are similar statements in function or toplevel `scope1` and `scope2`,
* respectively, where `scope1` and `scope2` are not the same.
*/
private predicate similarStatement(Scope scope1, Scope scope2, Stmt stmt1, Stmt stmt2) {
exists(int start, int end, int first, int last |
scope1.contains(stmt1) and
scope2.contains(stmt2) and
similarCoversStatement(start, end, first, last, stmt1) and
similarCoversStatement(start, end, first, last, stmt2) and
stmt1 != stmt2 and
scope1 != scope2
)
}
/**
* Holds if statement `stmt` is covered by a sequence of `SimilarBlock`s, where `first`
* is the index of the token in the first block that starts at the beginning of `stmt`,
* while `last` is the index of the token in the last block that ends at the end of `stmt`,
* and `equivstart` and `equivend` are the equivalence classes of the first and the last
* block, respectively.
*/
private predicate similarCoversStatement(
int equivstart, int equivend, int first, int last, Stmt stmt
) {
exists(SimilarBlock b1, SimilarBlock b2, Location startloc, Location endloc |
stmt.getLocation() = startloc and
stmt.getLastStatement().getLocation() = endloc and
first = b1.tokenStartingAt(startloc) and
last = b2.tokenEndingAt(endloc) and
b1.getEquivalenceClass() = equivstart and
b2.getEquivalenceClass() = equivend and
similar_extension(b1, _, b2, _, equivstart, equivend)
)
}
/**
* Holds if `sc1` is a function or toplevel with `total` lines, and `scope2` is a function or
* toplevel that has `similar` similar lines to `scope1`.
*/
private predicate similarStatements(Scope scope1, Scope scope2, int similar, int total) {
similar = strictcount(Stmt stmt | similarStatement(scope1, scope2, stmt, _)) and
total = strictcount(Stmt stmt | scope1.contains(stmt))
}
/**
* Find pairs of scopes that are similar
*/
predicate similarScopes(Scope s, Scope other, float percent, string message) {
exists(int total, int similar | similarStatements(s, other, similar, total) |
percent = 100.0 * similar / total and
percent >= 80.0 and
if similar = total
then message = "All statements in " + s.getName() + " are similar in $@."
else
message =
similar + " out of " + total + " statements in " + s.getName() + " are similar in $@."
)
}
/**
* Holds if the line is acceptable as a duplicate.
* This is true for blocks of import statements.
*/
predicate allowlistedLineForDuplication(File f, int line) {
exists(ImportingStmt i | i.getLocation().getFile() = f and i.getLocation().getStartLine() = line)
}

View File

@@ -16,19 +16,7 @@
*/
import python
import CodeDuplication
predicate sorted_by_location(DuplicateBlock x, DuplicateBlock y) {
if x.sourceFile() = y.sourceFile()
then x.sourceStartLine() < y.sourceStartLine()
else x.sourceFile().getAbsolutePath() < y.sourceFile().getAbsolutePath()
}
from DuplicateBlock d, DuplicateBlock other
where
d.sourceLines() > 10 and
other.getEquivalenceClass() = d.getEquivalenceClass() and
sorted_by_location(other, d)
select d,
"Duplicate code: " + d.sourceLines() + " lines are duplicated at " +
other.sourceFile().getShortName() + ":" + other.sourceStartLine().toString()
from BasicBlock d
where none()
select d, "Duplicate code: " + "-1" + " lines are duplicated at " + "<file>" + ":" + "-1"

View File

@@ -16,15 +16,7 @@
*/
import python
import CodeDuplication
predicate relevant(Function m) { m.getMetrics().getNumberOfLinesOfCode() > 5 }
from Function m, Function other, string message, int percent
where
duplicateScopes(m, other, percent, message) and
relevant(m) and
percent > 95.0 and
not duplicateScopes(m.getEnclosingModule(), other.getEnclosingModule(), _, _) and
not duplicateScopes(m.getScope(), other.getScope(), _, _)
from Function m, Function other, string message
where none()
select m, message, other, other.getName()

View File

@@ -16,11 +16,7 @@
*/
import python
import CodeDuplication
from Class c, Class other, string message
where
duplicateScopes(c, other, _, message) and
count(c.getAStmt()) > 3 and
not duplicateScopes(c.getEnclosingModule(), _, _, _)
where none()
select c, message, other, other.getName()

View File

@@ -16,8 +16,7 @@
*/
import python
import CodeDuplication
from Module m, Module other, int percent, string message
where duplicateScopes(m, other, percent, message)
from Module m, Module other, string message
where none()
select m, message, other, other.getName()

View File

@@ -16,8 +16,7 @@
*/
import python
import CodeDuplication
from Module m, Module other, string message
where similarScopes(m, other, _, message)
where none()
select m, message, other, other.getName()

View File

@@ -16,16 +16,7 @@
*/
import python
import CodeDuplication
predicate relevant(Function m) { m.getMetrics().getNumberOfLinesOfCode() > 10 }
from Function m, Function other, string message, int percent
where
similarScopes(m, other, percent, message) and
relevant(m) and
percent > 95.0 and
not duplicateScopes(m, other, _, _) and
not duplicateScopes(m.getEnclosingModule(), other.getEnclosingModule(), _, _) and
not duplicateScopes(m.getScope(), other.getScope(), _, _)
from Function m, Function other, string message
where none()
select m, message, other, other.getName()

View File

@@ -1,2 +0,0 @@
| Duplicate code: 34 duplicated lines. | Duplicate code: 34 duplicated lines. | duplicate_test.py | 9 | 42 |
| Duplicate code: 80 duplicated lines. | Duplicate code: 80 duplicated lines. | duplicate_test.py | 84 | 163 |

View File

@@ -1,23 +0,0 @@
/**
* @name Duplicate
* @description Insert description here...
* @kind table
* @problem.severity warning
*/
import python
import external.CodeDuplication
predicate lexically_sorted(DuplicateBlock dup1, DuplicateBlock dup2) {
dup1.sourceFile().getAbsolutePath() < dup2.sourceFile().getAbsolutePath()
or
dup1.sourceFile().getAbsolutePath() = dup2.sourceFile().getAbsolutePath() and
dup1.sourceStartLine() < dup2.sourceStartLine()
}
from DuplicateBlock dup1, DuplicateBlock dup2
where
dup1.getEquivalenceClass() = dup2.getEquivalenceClass() and
lexically_sorted(dup1, dup2)
select dup1.toString(), dup2.toString(), dup1.sourceFile().getShortName(), dup1.sourceStartLine(),
dup1.sourceEndLine()

View File

@@ -1,26 +0,0 @@
/**
* @name DuplicateStatements
* @description Insert description here...
* @kind problem
* @problem.severity warning
*/
import python
import external.CodeDuplication
predicate mostlyDuplicateFunction(Function f) {
exists(int covered, int total, Function other, int percent |
duplicateStatements(f, other, covered, total) and
covered != total and
total > 5 and
covered * 100 / total = percent and
percent > 80 and
not exists(Scope s | s = f.getScope*() | duplicateScopes(s, _, _, _))
)
}
from Stmt s
where
mostlyDuplicateFunction(s.getScope()) and
not duplicateStatement(s.getScope(), _, s, _)
select s.toString(), s.getLocation().toString()

View File

@@ -1,23 +0,0 @@
| duplicate_test.py:9:1:20:17 | Similar code: 12 almost duplicated lines. | duplicate_test.py:47:1:58:17 | Similar code: 12 almost duplicated lines. | duplicate_test.py | 9 | 20 |
| duplicate_test.py:9:1:20:17 | Similar code: 12 almost duplicated lines. | duplicate_test.py:249:1:260:17 | Similar code: 12 almost duplicated lines. | duplicate_test.py | 9 | 20 |
| duplicate_test.py:9:1:20:17 | Similar code: 12 almost duplicated lines. | duplicate_test.py:287:1:298:17 | Similar code: 12 almost duplicated lines. | duplicate_test.py | 9 | 20 |
| duplicate_test.py:14:8:25:13 | Similar code: 12 almost duplicated lines. | duplicate_test.py:52:8:63:13 | Similar code: 12 almost duplicated lines. | duplicate_test.py | 14 | 25 |
| duplicate_test.py:14:8:25:13 | Similar code: 12 almost duplicated lines. | duplicate_test.py:254:8:265:13 | Similar code: 12 almost duplicated lines. | duplicate_test.py | 14 | 25 |
| duplicate_test.py:20:28:42:31 | Similar code: 23 almost duplicated lines. | duplicate_test.py:58:28:80:31 | Similar code: 23 almost duplicated lines. | duplicate_test.py | 20 | 42 |
| duplicate_test.py:20:28:42:31 | Similar code: 23 almost duplicated lines. | duplicate_test.py:260:28:282:31 | Similar code: 23 almost duplicated lines. | duplicate_test.py | 20 | 42 |
| duplicate_test.py:20:28:42:31 | Similar code: 23 almost duplicated lines. | duplicate_test.py:296:40:318:31 | Similar code: 23 almost duplicated lines. | duplicate_test.py | 20 | 42 |
| duplicate_test.py:36:1:47:0 | Similar code: 12 almost duplicated lines. | duplicate_test.py:74:1:84:0 | Similar code: 11 almost duplicated lines. | duplicate_test.py | 36 | 47 |
| duplicate_test.py:36:1:47:0 | Similar code: 12 almost duplicated lines. | duplicate_test.py:276:1:287:0 | Similar code: 12 almost duplicated lines. | duplicate_test.py | 36 | 47 |
| duplicate_test.py:36:22:56:26 | Similar code: 21 almost duplicated lines. | duplicate_test.py:276:21:296:26 | Similar code: 21 almost duplicated lines. | duplicate_test.py | 36 | 56 |
| duplicate_test.py:42:22:57:9 | Similar code: 16 almost duplicated lines. | duplicate_test.py:245:20:259:9 | Similar code: 15 almost duplicated lines. | duplicate_test.py | 42 | 57 |
| duplicate_test.py:42:22:57:9 | Similar code: 16 almost duplicated lines. | duplicate_test.py:282:22:297:9 | Similar code: 16 almost duplicated lines. | duplicate_test.py | 42 | 57 |
| duplicate_test.py:47:1:58:17 | Similar code: 12 almost duplicated lines. | duplicate_test.py:249:1:260:17 | Similar code: 12 almost duplicated lines. | duplicate_test.py | 47 | 58 |
| duplicate_test.py:47:1:58:17 | Similar code: 12 almost duplicated lines. | duplicate_test.py:287:1:298:17 | Similar code: 12 almost duplicated lines. | duplicate_test.py | 47 | 58 |
| duplicate_test.py:52:8:63:13 | Similar code: 12 almost duplicated lines. | duplicate_test.py:254:8:265:13 | Similar code: 12 almost duplicated lines. | duplicate_test.py | 52 | 63 |
| duplicate_test.py:58:28:80:31 | Similar code: 23 almost duplicated lines. | duplicate_test.py:260:28:282:31 | Similar code: 23 almost duplicated lines. | duplicate_test.py | 58 | 80 |
| duplicate_test.py:58:28:80:31 | Similar code: 23 almost duplicated lines. | duplicate_test.py:296:40:318:31 | Similar code: 23 almost duplicated lines. | duplicate_test.py | 58 | 80 |
| duplicate_test.py:74:1:84:0 | Similar code: 11 almost duplicated lines. | duplicate_test.py:276:1:287:0 | Similar code: 12 almost duplicated lines. | duplicate_test.py | 74 | 84 |
| duplicate_test.py:82:25:163:24 | Similar code: 82 almost duplicated lines. | duplicate_test.py:163:24:245:24 | Similar code: 83 almost duplicated lines. | duplicate_test.py | 82 | 163 |
| duplicate_test.py:245:20:259:9 | Similar code: 15 almost duplicated lines. | duplicate_test.py:282:22:297:9 | Similar code: 16 almost duplicated lines. | duplicate_test.py | 245 | 259 |
| duplicate_test.py:249:1:260:17 | Similar code: 12 almost duplicated lines. | duplicate_test.py:287:1:298:17 | Similar code: 12 almost duplicated lines. | duplicate_test.py | 249 | 260 |
| duplicate_test.py:260:28:282:31 | Similar code: 23 almost duplicated lines. | duplicate_test.py:296:40:318:31 | Similar code: 23 almost duplicated lines. | duplicate_test.py | 260 | 282 |

View File

@@ -1,22 +0,0 @@
/**
* @name Similar
* @description Insert description here...
* @kind table
* @problem.severity warning
*/
import python
import external.CodeDuplication
predicate lexically_sorted(SimilarBlock dup1, SimilarBlock dup2) {
dup1.sourceFile().getAbsolutePath() < dup2.sourceFile().getAbsolutePath()
or
dup1.sourceFile().getAbsolutePath() = dup2.sourceFile().getAbsolutePath() and
dup1.sourceStartLine() < dup2.sourceStartLine()
}
from SimilarBlock dup1, SimilarBlock dup2
where
dup1.getEquivalenceClass() = dup2.getEquivalenceClass() and
lexically_sorted(dup1, dup2)
select dup1, dup2, dup1.sourceFile().getShortName(), dup1.sourceStartLine(), dup1.sourceEndLine()

View File

@@ -1,321 +0,0 @@
#Code Duplication
#Exact duplication of function
#Code copied from stdlib, copyright PSF.
#See http://www.python.org/download/releases/2.7/license/
def dis(x=None):
"""Disassemble classes, methods, functions, or code.
With no argument, disassemble the last traceback.
"""
if x is None:
distb()
return
if isinstance(x, types.InstanceType):
x = x.__class__
if hasattr(x, 'im_func'):
x = x.im_func
if hasattr(x, 'func_code'):
x = x.func_code
if hasattr(x, '__dict__'):
items = x.__dict__.items()
items.sort()
for name, x1 in items:
if isinstance(x1, _have_code):
print "Disassembly of %s:" % name
try:
dis(x1)
except TypeError, msg:
print "Sorry:", msg
print
elif hasattr(x, 'co_code'):
disassemble(x)
elif isinstance(x, str):
disassemble_string(x)
else:
raise TypeError, \
"don't know how to disassemble %s objects" % \
type(x).__name__
#And duplicate version
def dis2(x=None):
"""Disassemble classes, methods, functions, or code.
With no argument, disassemble the last traceback.
"""
if x is None:
distb()
return
if isinstance(x, types.InstanceType):
x = x.__class__
if hasattr(x, 'im_func'):
x = x.im_func
if hasattr(x, 'func_code'):
x = x.func_code
if hasattr(x, '__dict__'):
items = x.__dict__.items()
items.sort()
for name, x1 in items:
if isinstance(x1, _have_code):
print "Disassembly of %s:" % name
try:
dis(x1)
except TypeError, msg:
print "Sorry:", msg
print
elif hasattr(x, 'co_code'):
disassemble(x)
elif isinstance(x, str):
disassemble_string(x)
else:
raise TypeError, \
"don't know how to disassemble %s objects" % \
type(x).__name__
#Exactly duplicate class
class Popen3:
"""Class representing a child process. Normally, instances are created
internally by the functions popen2() and popen3()."""
sts = -1 # Child not completed yet
def __init__(self, cmd, capturestderr=False, bufsize=-1):
"""The parameter 'cmd' is the shell command to execute in a
sub-process. On UNIX, 'cmd' may be a sequence, in which case arguments
will be passed directly to the program without shell intervention (as
with os.spawnv()). If 'cmd' is a string it will be passed to the shell
(as with os.system()). The 'capturestderr' flag, if true, specifies
that the object should capture standard error output of the child
process. The default is false. If the 'bufsize' parameter is
specified, it specifies the size of the I/O buffers to/from the child
process."""
_cleanup()
self.cmd = cmd
p2cread, p2cwrite = os.pipe()
c2pread, c2pwrite = os.pipe()
if capturestderr:
errout, errin = os.pipe()
self.pid = os.fork()
if self.pid == 0:
# Child
os.dup2(p2cread, 0)
os.dup2(c2pwrite, 1)
if capturestderr:
os.dup2(errin, 2)
self._run_child(cmd)
os.close(p2cread)
self.tochild = os.fdopen(p2cwrite, 'w', bufsize)
os.close(c2pwrite)
self.fromchild = os.fdopen(c2pread, 'r', bufsize)
if capturestderr:
os.close(errin)
self.childerr = os.fdopen(errout, 'r', bufsize)
else:
self.childerr = None
def __del__(self):
# In case the child hasn't been waited on, check if it's done.
self.poll(_deadstate=sys.maxint)
if self.sts < 0:
if _active is not None:
# Child is still running, keep us alive until we can wait on it.
_active.append(self)
def _run_child(self, cmd):
if isinstance(cmd, basestring):
cmd = ['/bin/sh', '-c', cmd]
os.closerange(3, MAXFD)
try:
os.execvp(cmd[0], cmd)
finally:
os._exit(1)
def poll(self, _deadstate=None):
"""Return the exit status of the child process if it has finished,
or -1 if it hasn't finished yet."""
if self.sts < 0:
try:
pid, sts = os.waitpid(self.pid, os.WNOHANG)
# pid will be 0 if self.pid hasn't terminated
if pid == self.pid:
self.sts = sts
except os.error:
if _deadstate is not None:
self.sts = _deadstate
return self.sts
def wait(self):
"""Wait for and return the exit status of the child process."""
if self.sts < 0:
pid, sts = os.waitpid(self.pid, 0)
# This used to be a test, but it is believed to be
# always true, so I changed it to an assertion - mvl
assert pid == self.pid
self.sts = sts
return self.sts
class Popen3Again:
"""Class representing a child process. Normally, instances are created
internally by the functions popen2() and popen3()."""
sts = -1 # Child not completed yet
def __init__(self, cmd, capturestderr=False, bufsize=-1):
"""The parameter 'cmd' is the shell command to execute in a
sub-process. On UNIX, 'cmd' may be a sequence, in which case arguments
will be passed directly to the program without shell intervention (as
with os.spawnv()). If 'cmd' is a string it will be passed to the shell
(as with os.system()). The 'capturestderr' flag, if true, specifies
that the object should capture standard error output of the child
process. The default is false. If the 'bufsize' parameter is
specified, it specifies the size of the I/O buffers to/from the child
process."""
_cleanup()
self.cmd = cmd
p2cread, p2cwrite = os.pipe()
c2pread, c2pwrite = os.pipe()
if capturestderr:
errout, errin = os.pipe()
self.pid = os.fork()
if self.pid == 0:
# Child
os.dup2(p2cread, 0)
os.dup2(c2pwrite, 1)
if capturestderr:
os.dup2(errin, 2)
self._run_child(cmd)
os.close(p2cread)
self.tochild = os.fdopen(p2cwrite, 'w', bufsize)
os.close(c2pwrite)
self.fromchild = os.fdopen(c2pread, 'r', bufsize)
if capturestderr:
os.close(errin)
self.childerr = os.fdopen(errout, 'r', bufsize)
else:
self.childerr = None
def __del__(self):
# In case the child hasn't been waited on, check if it's done.
self.poll(_deadstate=sys.maxint)
if self.sts < 0:
if _active is not None:
# Child is still running, keep us alive until we can wait on it.
_active.append(self)
def _run_child(self, cmd):
if isinstance(cmd, basestring):
cmd = ['/bin/sh', '-c', cmd]
os.closerange(3, MAXFD)
try:
os.execvp(cmd[0], cmd)
finally:
os._exit(1)
def poll(self, _deadstate=None):
"""Return the exit status of the child process if it has finished,
or -1 if it hasn't finished yet."""
if self.sts < 0:
try:
pid, sts = os.waitpid(self.pid, os.WNOHANG)
# pid will be 0 if self.pid hasn't terminated
if pid == self.pid:
self.sts = sts
except os.error:
if _deadstate is not None:
self.sts = _deadstate
return self.sts
def wait(self):
"""Wait for and return the exit status of the child process."""
if self.sts < 0:
pid, sts = os.waitpid(self.pid, 0)
# This used to be a test, but it is believed to be
# always true, so I changed it to an assertion - mvl
assert pid == self.pid
self.sts = sts
return self.sts
#Duplicate function with identifiers changed
def dis3(y=None):
"""frobnicate classes, methods, functions, or code.
With no argument, frobnicate the last traceback.
"""
if y is None:
distb()
return
if isinstance(y, types.InstanceType):
y = y.__class__
if hasattr(y, 'im_func'):
y = y.im_func
if hasattr(y, 'func_code'):
y = y.func_code
if hasattr(y, '__dict__'):
items = y.__dict__.items()
items.sort()
for name, y1 in items:
if isinstance(y1, _have_code):
print "Disassembly of %s:" % name
try:
dis(y1)
except TypeError, msg:
print "Sorry:", msg
print
elif hasattr(y, 'co_code'):
frobnicate(y)
elif isinstance(y, str):
frobnicate_string(y)
else:
raise TypeError, \
"don't know how to frobnicate %s objects" % \
type(y).__name__
#Mostly similar function with changed identifiers
def dis5(z=None):
"""splat classes, methods, functions, or code.
With no argument, splat the last traceback.
"""
if z is None:
distb()
return
if isinstance(z, types.InstanceType):
z = z.__class__
if hasattr(y, 'func_code'):
y = y.func_code
if hasattr(z, '__dict__'):
items = z.__dict__.items()
items.sort()
for name, z1 in items:
if isinstance(z1, _have_code):
print "Disassembly of %s:" % name
try:
dis(z1)
except TypeError, msg:
print "Sorry:", msg
print
elif hasattr(z, 'co_code'):
splat(z)
elif isinstance(z, str):
splat_string(z)
else:
raise TypeError, \
"don't know how to splat %s objects" % \
type(z).__name__

View File

@@ -1,8 +0,0 @@
| duplicate_test.py:47:9:60:17 | Duplicate code: 14 duplicated lines. | Duplicate code: 14 lines are duplicated at duplicate_test.py:9 |
| duplicate_test.py:56:18:66:25 | Duplicate code: 11 duplicated lines. | Duplicate code: 11 lines are duplicated at duplicate_test.py:18 |
| duplicate_test.py:61:24:80:32 | Duplicate code: 20 duplicated lines. | Duplicate code: 20 lines are duplicated at duplicate_test.py:23 |
| duplicate_test.py:166:18:245:24 | Duplicate code: 80 duplicated lines. | Duplicate code: 80 lines are duplicated at duplicate_test.py:84 |
| duplicate_test.py:287:9:300:17 | Duplicate code: 14 duplicated lines. | Duplicate code: 14 lines are duplicated at duplicate_test.py:9 |
| duplicate_test.py:287:9:300:17 | Duplicate code: 14 duplicated lines. | Duplicate code: 14 lines are duplicated at duplicate_test.py:47 |
| duplicate_test.py:299:22:318:32 | Duplicate code: 20 duplicated lines. | Duplicate code: 20 lines are duplicated at duplicate_test.py:23 |
| duplicate_test.py:299:22:318:32 | Duplicate code: 20 duplicated lines. | Duplicate code: 20 lines are duplicated at duplicate_test.py:61 |

View File

@@ -1 +0,0 @@
external/DuplicateBlock.ql

View File

@@ -1,4 +0,0 @@
| duplicate_test.py:9:1:9:16 | Function dis | All 26 statements in dis are identical in $@. | duplicate_test.py:47:1:47:17 | Function dis2 | dis2 |
| duplicate_test.py:47:1:47:17 | Function dis2 | All 26 statements in dis2 are identical in $@. | duplicate_test.py:9:1:9:16 | Function dis | dis |
| duplicate_test.py:287:1:287:17 | Function dis4 | All 24 statements in dis4 are identical in $@. | duplicate_test.py:9:1:9:16 | Function dis | dis |
| duplicate_test.py:287:1:287:17 | Function dis4 | All 24 statements in dis4 are identical in $@. | duplicate_test.py:47:1:47:17 | Function dis2 | dis2 |

View File

@@ -1 +0,0 @@
external/DuplicateFunction.ql

View File

@@ -1,2 +0,0 @@
| duplicate_test.py:84:1:84:13 | Class Popen3 | All 55 statements in Popen3 are identical in $@. | duplicate_test.py:166:1:166:18 | Class Popen3Again | Popen3Again |
| duplicate_test.py:166:1:166:18 | Class Popen3Again | All 55 statements in Popen3Again are identical in $@. | duplicate_test.py:84:1:84:13 | Class Popen3 | Popen3 |

View File

@@ -1 +0,0 @@
external/MostlyDuplicateClass.ql

View File

@@ -1 +0,0 @@
external/MostlyDuplicateFile.ql

View File

@@ -1 +0,0 @@
external/MostlySimilarFile.ql

View File

@@ -1,12 +0,0 @@
| duplicate_test.py:9:1:9:16 | Function dis | All statements in dis are similar in $@. | duplicate_test.py:249:1:249:17 | Function dis3 | dis3 |
| duplicate_test.py:9:1:9:16 | Function dis | All statements in dis are similar in $@. | duplicate_test.py:323:1:323:17 | Function dis5 | dis5 |
| duplicate_test.py:47:1:47:17 | Function dis2 | All statements in dis2 are similar in $@. | duplicate_test.py:249:1:249:17 | Function dis3 | dis3 |
| duplicate_test.py:47:1:47:17 | Function dis2 | All statements in dis2 are similar in $@. | duplicate_test.py:323:1:323:17 | Function dis5 | dis5 |
| duplicate_test.py:249:1:249:17 | Function dis3 | All statements in dis3 are similar in $@. | duplicate_test.py:9:1:9:16 | Function dis | dis |
| duplicate_test.py:249:1:249:17 | Function dis3 | All statements in dis3 are similar in $@. | duplicate_test.py:47:1:47:17 | Function dis2 | dis2 |
| duplicate_test.py:249:1:249:17 | Function dis3 | All statements in dis3 are similar in $@. | duplicate_test.py:323:1:323:17 | Function dis5 | dis5 |
| duplicate_test.py:287:1:287:17 | Function dis4 | All statements in dis4 are similar in $@. | duplicate_test.py:249:1:249:17 | Function dis3 | dis3 |
| duplicate_test.py:287:1:287:17 | Function dis4 | All statements in dis4 are similar in $@. | duplicate_test.py:323:1:323:17 | Function dis5 | dis5 |
| duplicate_test.py:323:1:323:17 | Function dis5 | All statements in dis5 are similar in $@. | duplicate_test.py:9:1:9:16 | Function dis | dis |
| duplicate_test.py:323:1:323:17 | Function dis5 | All statements in dis5 are similar in $@. | duplicate_test.py:47:1:47:17 | Function dis2 | dis2 |
| duplicate_test.py:323:1:323:17 | Function dis5 | All statements in dis5 are similar in $@. | duplicate_test.py:249:1:249:17 | Function dis3 | dis3 |

View File

@@ -1 +0,0 @@
external/SimilarFunction.ql

View File

@@ -1,358 +0,0 @@
#Code Duplication
#Exact duplication of function
#Code copied from stdlib, copyright PSF.
#See http://www.python.org/download/releases/2.7/license/
def dis(x=None):
"""Disassemble classes, methods, functions, or code.
With no argument, disassemble the last traceback.
"""
if x is None:
distb()
return
if isinstance(x, types.InstanceType):
x = x.__class__
if hasattr(x, 'im_func'):
x = x.im_func
if hasattr(x, 'func_code'):
x = x.func_code
if hasattr(x, '__dict__'):
items = x.__dict__.items()
items.sort()
for name, x1 in items:
if isinstance(x1, _have_code):
print("Disassembly of %s:" % name)
try:
dis(x1)
except TypeError(msg):
print("Sorry:", msg)
print()
elif hasattr(x, 'co_code'):
disassemble(x)
elif isinstance(x, str):
disassemble_string(x)
else:
raise TypeError(
"don't know how to disassemble %s objects" %
type(x).__name__)
#And duplicate version
def dis2(x=None):
"""Disassemble classes, methods, functions, or code.
With no argument, disassemble the last traceback.
"""
if x is None:
distb()
return
if isinstance(x, types.InstanceType):
x = x.__class__
if hasattr(x, 'im_func'):
x = x.im_func
if hasattr(x, 'func_code'):
x = x.func_code
if hasattr(x, '__dict__'):
items = x.__dict__.items()
items.sort()
for name, x1 in items:
if isinstance(x1, _have_code):
print("Disassembly of %s:" % name)
try:
dis(x1)
except TypeError(msg):
print("Sorry:", msg)
print()
elif hasattr(x, 'co_code'):
disassemble(x)
elif isinstance(x, str):
disassemble_string(x)
else:
raise TypeError(
"don't know how to disassemble %s objects" %
type(x).__name__)
#Exactly duplicate class
class Popen3:
"""Class representing a child process. Normally, instances are created
internally by the functions popen2() and popen3()."""
sts = -1 # Child not completed yet
def __init__(self, cmd, capturestderr=False, bufsize=-1):
"""The parameter 'cmd' is the shell command to execute in a
sub-process. On UNIX, 'cmd' may be a sequence, in which case arguments
will be passed directly to the program without shell intervention (as
with os.spawnv()). If 'cmd' is a string it will be passed to the shell
(as with os.system()). The 'capturestderr' flag, if true, specifies
that the object should capture standard error output of the child
process. The default is false. If the 'bufsize' parameter is
specified, it specifies the size of the I/O buffers to/from the child
process."""
_cleanup()
self.cmd = cmd
p2cread, p2cwrite = os.pipe()
c2pread, c2pwrite = os.pipe()
if capturestderr:
errout, errin = os.pipe()
self.pid = os.fork()
if self.pid == 0:
# Child
os.dup2(p2cread, 0)
os.dup2(c2pwrite, 1)
if capturestderr:
os.dup2(errin, 2)
self._run_child(cmd)
os.close(p2cread)
self.tochild = os.fdopen(p2cwrite, 'w', bufsize)
os.close(c2pwrite)
self.fromchild = os.fdopen(c2pread, 'r', bufsize)
if capturestderr:
os.close(errin)
self.childerr = os.fdopen(errout, 'r', bufsize)
else:
self.childerr = None
def __del__(self):
# In case the child hasn't been waited on, check if it's done.
self.poll(_deadstate=sys.maxint)
if self.sts < 0:
if _active is not None:
# Child is still running, keep us alive until we can wait on it.
_active.append(self)
def _run_child(self, cmd):
if isinstance(cmd, basestring):
cmd = ['/bin/sh', '-c', cmd]
os.closerange(3, MAXFD)
try:
os.execvp(cmd[0], cmd)
finally:
os._exit(1)
def poll(self, _deadstate=None):
"""Return the exit status of the child process if it has finished,
or -1 if it hasn't finished yet."""
if self.sts < 0:
try:
pid, sts = os.waitpid(self.pid, os.WNOHANG)
# pid will be 0 if self.pid hasn't terminated
if pid == self.pid:
self.sts = sts
except os.error:
if _deadstate is not None:
self.sts = _deadstate
return self.sts
def wait(self):
"""Wait for and return the exit status of the child process."""
if self.sts < 0:
pid, sts = os.waitpid(self.pid, 0)
# This used to be a test, but it is believed to be
# always true, so I changed it to an assertion - mvl
assert pid == self.pid
self.sts = sts
return self.sts
class Popen3Again:
"""Class representing a child process. Normally, instances are created
internally by the functions popen2() and popen3()."""
sts = -1 # Child not completed yet
def __init__(self, cmd, capturestderr=False, bufsize=-1):
"""The parameter 'cmd' is the shell command to execute in a
sub-process. On UNIX, 'cmd' may be a sequence, in which case arguments
will be passed directly to the program without shell intervention (as
with os.spawnv()). If 'cmd' is a string it will be passed to the shell
(as with os.system()). The 'capturestderr' flag, if true, specifies
that the object should capture standard error output of the child
process. The default is false. If the 'bufsize' parameter is
specified, it specifies the size of the I/O buffers to/from the child
process."""
_cleanup()
self.cmd = cmd
p2cread, p2cwrite = os.pipe()
c2pread, c2pwrite = os.pipe()
if capturestderr:
errout, errin = os.pipe()
self.pid = os.fork()
if self.pid == 0:
# Child
os.dup2(p2cread, 0)
os.dup2(c2pwrite, 1)
if capturestderr:
os.dup2(errin, 2)
self._run_child(cmd)
os.close(p2cread)
self.tochild = os.fdopen(p2cwrite, 'w', bufsize)
os.close(c2pwrite)
self.fromchild = os.fdopen(c2pread, 'r', bufsize)
if capturestderr:
os.close(errin)
self.childerr = os.fdopen(errout, 'r', bufsize)
else:
self.childerr = None
def __del__(self):
# In case the child hasn't been waited on, check if it's done.
self.poll(_deadstate=sys.maxint)
if self.sts < 0:
if _active is not None:
# Child is still running, keep us alive until we can wait on it.
_active.append(self)
def _run_child(self, cmd):
if isinstance(cmd, basestring):
cmd = ['/bin/sh', '-c', cmd]
os.closerange(3, MAXFD)
try:
os.execvp(cmd[0], cmd)
finally:
os._exit(1)
def poll(self, _deadstate=None):
"""Return the exit status of the child process if it has finished,
or -1 if it hasn't finished yet."""
if self.sts < 0:
try:
pid, sts = os.waitpid(self.pid, os.WNOHANG)
# pid will be 0 if self.pid hasn't terminated
if pid == self.pid:
self.sts = sts
except os.error:
if _deadstate is not None:
self.sts = _deadstate
return self.sts
def wait(self):
"""Wait for and return the exit status of the child process."""
if self.sts < 0:
pid, sts = os.waitpid(self.pid, 0)
# This used to be a test, but it is believed to be
# always true, so I changed it to an assertion - mvl
assert pid == self.pid
self.sts = sts
return self.sts
#Duplicate function with identifiers changed
def dis3(y=None):
"""frobnicate classes, methods, functions, or code.
With no argument, frobnicate the last traceback.
"""
if y is None:
distb()
return
if isinstance(y, types.InstanceType):
y = y.__class__
if hasattr(y, 'im_func'):
y = y.im_func
if hasattr(y, 'func_code'):
y = y.func_code
if hasattr(y, '__dict__'):
items = y.__dict__.items()
items.sort()
for name, y1 in items:
if isinstance(y1, _have_code):
print("Disassembly of %s:" % name)
try:
dis(y1)
except TypeError(msg):
print("Sorry:", msg)
print()
elif hasattr(y, 'co_code'):
frobnicate(y)
elif isinstance(y, str):
frobnicate_string(y)
else:
raise TypeError(
"don't know how to frobnicate %s objects" %
type(y).__name__)
#Mostly similar function
def dis4(x=None):
"""Disassemble classes, methods, functions, or code.
With no argument, disassemble the last traceback.
"""
if x is None:
distb()
return
if isinstance(x, types.InstanceType):
x = x.__class__
if hasattr(x, 'im_func'):
x = x.im_func
if hasattr(x, '__dict__'):
items = x.__dict__.items()
items.sort()
for name, x1 in items:
if isinstance(x1, _have_code):
print("Disassembly of %s:" % name)
try:
dis(x1)
except TypeError(msg):
print("Sorry:", msg)
print()
elif hasattr(x, 'co_code'):
disassemble(x)
elif isinstance(x, str):
disassemble_string(x)
else:
raise TypeError(
"don't know how to disassemble %s objects" %
type(x).__name__)
#Similar function with changed identifiers
def dis5(z=None):
"""splat classes, methods, functions, or code.
With no argument, splat the last traceback.
"""
if z is None:
distb()
return
if isinstance(z, types.InstanceType):
z = z.__class__
if hasattr(z, 'im_func'):
z = z.im_func
if hasattr(y, 'func_code'):
y = y.func_code
if hasattr(z, '__dict__'):
items = z.__dict__.items()
items.sort()
for name, z1 in items:
if isinstance(z1, _have_code):
print("Disassembly of %s:" % name)
try:
dis(z1)
except TypeError(msg):
print("Sorry:", msg)
print()
elif hasattr(z, 'co_code'):
splat(z)
elif isinstance(z, str):
splat_string(z)
else:
raise TypeError(
"don't know how to splat %s objects" %
type(z).__name__)

View File

@@ -1,63 +0,0 @@
def original(the_ast):
def walk(node, in_function, in_name_main):
def flags():
return in_function * 2 + in_name_main
if isinstance(node, ast.Module):
for import_node in walk(node.body, in_function, in_name_main):
yield import_node
elif isinstance(node, ast.ImportFrom):
aliases = [ Alias(a.name, a.asname) for a in node.names]
yield FromImport(node.level, node.module, aliases, flags())
elif isinstance(node, ast.Import):
aliases = [ Alias(a.name, a.asname) for a in node.names]
yield Import(aliases, flags())
elif isinstance(node, ast.FunctionDef):
for _, child in ast.iter_fields(node):
for import_node in walk(child, True, in_name_main):
yield import_node
elif isinstance(node, list):
for n in node:
for import_node in walk(n, in_function, in_name_main):
yield import_node
return list(walk(the_ast, False, False))
def similar_1(the_ast):
def walk(node, in_function, in_name_main):
def flags():
return in_function * 2 + in_name_main
if isinstance(node, ast.Module):
for import_node in walk(node.body, in_function, in_name_main):
yield import_node
elif isinstance(node, ast.ImportFrom):
aliases = [ Alias(a.name, a.asname) for a in node.names]
yield FromImport(node.level, node.module, aliases, flags())
elif isinstance(node, ast.Import):
aliases = [ Alias(a.name, a.asname) for a in node.names]
yield Import(aliases, flags())
elif isinstance(node, ast.FunctionDef):
for _, child in ast.iter_fields(node):
for import_node in walk(child, True, in_name_main):
yield import_node
return list(walk(the_ast, False, False))
def similar_2(the_ast):
def walk(node, in_function, in_name_main):
def flags():
return in_function * 2 + in_name_main
if isinstance(node, ast.Module):
for import_node in walk(node.body, in_function, in_name_main):
yield import_node
elif isinstance(node, ast.Import):
aliases = [ Alias(a.name, a.asname) for a in node.names]
yield Import(aliases, flags())
elif isinstance(node, ast.FunctionDef):
for _, child in ast.iter_fields(node):
for import_node in walk(child, True, in_name_main):
yield import_node
elif isinstance(node, list):
for n in node:
for import_node in walk(n, in_function, in_name_main):
yield import_node
return list(walk(the_ast, False, False))