Go: add discard predicates for XML entities

This is adapted from the implementation for Java.

Since the HTML/XML extractor is not (yet) incremental, it will extract
files that were not in the diff. These discard predicates are intended
to cope with that, while also being robust against a future version
where the extractor *is* overlay-aware.
This commit is contained in:
Nick Rolfe
2025-10-31 17:09:31 +00:00
parent 10fa1d650d
commit 734cba7b9c

View File

@@ -50,3 +50,40 @@ private predicate discardLocatable(@locatable locatable) {
discardableLocatable(file, locatable) and discardableFile(path)
)
}
private @file getXmlFile(@xmllocatable locatable) {
exists(@location_default location | xmllocations(locatable, location) |
locations_default(location, result, _, _, _, _)
)
}
private @file getXmlFileInBase(@xmllocatable locatable) {
not isOverlay() and
result = getXmlFile(locatable)
}
/**
* Holds if the given `file` was extracted as part of the overlay and was extracted by the HTML/XML
* extractor.
*/
private predicate overlayXmlExtracted(@file file) {
isOverlay() and
exists(@xmllocatable locatable |
not files(locatable, _) and not xmlNs(locatable, _, _, _) and file = getXmlFile(locatable)
)
}
/**
* Holds if the given XML `locatable` should be discarded, because it is part of the overlay base
* and is in a file that was also extracted as part of the overlay database.
*/
overlay[discard_entity]
private predicate discardXmlLocatable(@xmllocatable locatable) {
exists(@file file | file = getXmlFileInBase(locatable) |
exists(string path | files(file, path) | overlayChangedFiles(path))
or
// The HTML/XML extractor is currently not incremental and may extract more files than those
// included in overlayChangedFiles.
overlayXmlExtracted(file)
)
}