using Microsoft.CodeAnalysis; using Microsoft.CodeAnalysis.Text; using Semmle.Util; using System.Collections.Generic; using System.Linq; namespace Semmle.Extraction.CommentProcessing { /// /// Implements the comment processor for associating comments with program elements. /// Registers locations of comments and program elements, /// then generates binding information. /// class CommentProcessor : ICommentGenerator { public void AddComment(ICommentLine comment) { comments[comment.Location] = comment; } // Comments sorted by location. private readonly SortedDictionary comments = new SortedDictionary(new LocationComparer()); // Program elements sorted by location. private readonly SortedDictionary elements = new SortedDictionary(new LocationComparer()); private readonly Dictionary duplicationGuardKeys = new Dictionary(); private Key GetDuplicationGuardKey(Label label) { Key duplicationGuardKey; if (duplicationGuardKeys.TryGetValue(label, out duplicationGuardKey)) return duplicationGuardKey; return null; } class LocationComparer : IComparer { public int Compare(Location l1, Location l2) => CommentProcessor.Compare(l1, l2); } /// /// Comparer for two locations, allowing them to be inserted into a sorted list. /// /// First location /// Second location /// <0 if l1 before l2, >0 if l1 after l2, else 0. static int Compare(Location l1, Location l2) { int diff = l1.SourceTree == l2.SourceTree ? 0 : l1.SourceTree.FilePath.CompareTo(l2.SourceTree.FilePath); if (diff != 0) return diff; diff = l1.SourceSpan.Start - l2.SourceSpan.Start; if (diff != 0) return diff; return l1.SourceSpan.End - l2.SourceSpan.End; } /// /// Called by the populator when there is a program element which can have comments. /// /// The label of the element in the trap file. /// The duplication guard key of the element, if any. /// The location of the element. public void AddElement(Label elementLabel, Key duplicationGuardKey, Location loc) { if (loc != null && loc.IsInSource) elements[loc] = elementLabel; if (duplicationGuardKey != null) duplicationGuardKeys[elementLabel] = duplicationGuardKey; } // Ensure that commentBlock and element refer to the same file // which can happen when processing multiple files. void EnsureSameFile(ICommentBlock commentBlock, ref KeyValuePair? element) { if (element != null && element.Value.Key.SourceTree != commentBlock.Location.SourceTree) element = null; } /// /// Generate the bindings between a comment and program elements. /// Called once for each commentBlock. /// /// /// The comment block. /// The element before the comment block. /// The element after the comment block. /// The parent element of the comment block. /// Output binding information. void GenerateBindings( ICommentBlock commentBlock, KeyValuePair? previousElement, KeyValuePair? nextElement, KeyValuePair? parentElement, CommentBindingCallback callback ) { EnsureSameFile(commentBlock, ref previousElement); EnsureSameFile(commentBlock, ref nextElement); EnsureSameFile(commentBlock, ref parentElement); if (previousElement != null) { var key = previousElement.Value.Value; callback(key, GetDuplicationGuardKey(key), commentBlock, CommentBinding.Before); } if (nextElement != null) { var key = nextElement.Value.Value; callback(key, GetDuplicationGuardKey(key), commentBlock, CommentBinding.After); } if (parentElement != null) { var key = parentElement.Value.Value; callback(key, GetDuplicationGuardKey(key), commentBlock, CommentBinding.Parent); } // Heuristic to decide which is the "best" element associated with the comment. KeyValuePair? bestElement; if (previousElement != null && previousElement.Value.Key.EndLine() == commentBlock.Location.StartLine()) { // 1. If the comment is on the same line as the previous element, use that bestElement = previousElement; } else if (nextElement != null && nextElement.Value.Key.StartLine() == commentBlock.Location.EndLine()) { // 2. If the comment is on the same line as the next element, use that bestElement = nextElement; } else if (nextElement != null && previousElement != null && previousElement.Value.Key.EndLine() + 1 == commentBlock.Location.StartLine() && commentBlock.Location.EndLine() + 1 == nextElement.Value.Key.StartLine()) { // 3. If comment is equally between two elements, use the parentElement // because it's ambiguous whether the comment refers to the next or previous element bestElement = parentElement; } else if (nextElement != null && nextElement.Value.Key.StartLine() == commentBlock.Location.EndLine() + 1) { // 4. If there is no gap after the comment, use "nextElement" bestElement = nextElement; } else if (previousElement != null && previousElement.Value.Key.EndLine() + 1 == commentBlock.Location.StartLine()) { // 5. If there is no gap before the comment, use previousElement bestElement = previousElement; } else { // 6. Otherwise, bind the comment to the parent block. bestElement = parentElement; /* if parentElement==null, then there is no best element. The comment is effectively orphaned. * * This can be caused by comments that are not in a type declaration. * Due to restrictions in the dbscheme, the comment cannot be associated with the "file" * which is not an element, and the "using" declarations are not emitted by the extractor. */ } if (bestElement != null) { var label = bestElement.Value.Value; callback(label, GetDuplicationGuardKey(label), commentBlock, CommentBinding.Best); } } // Stores element nesting information in a stack. // Top of stack = most nested element, based on Location. private class ElementStack { // Invariant: the top of the stack must be contained by items below it. readonly Stack> elementStack = new Stack>(); /// /// Add a new element to the stack. /// /// The stack is maintained. /// The new element to push. public void Push(KeyValuePair value) { // Maintain the invariant by popping existing elements while (elementStack.Count > 0 && !elementStack.Peek().Key.Contains(value.Key)) elementStack.Pop(); elementStack.Push(value); } /// /// Locate the parent of a comment with location l. /// /// The location of the comment. /// An element completely containing l, or null if none found. public KeyValuePair? FindParent(Location l) => elementStack.Where(v => v.Key.Contains(l)).FirstOrNull(); /// /// Finds the element on the stack immediately preceding the comment at l. /// /// The location of the comment. /// The element before l, or null. public KeyValuePair? FindBefore(Location l) { return elementStack. Where(v => v.Key.SourceSpan.End < l.SourceSpan.Start). LastOrNull(); } /// /// Finds the element after the comment. /// /// The location of the comment. /// The next element. /// The next element. public KeyValuePair? FindAfter(Location comment, KeyValuePair? next) { var p = FindParent(comment); return next.HasValue && p.HasValue && p.Value.Key.Before(next.Value.Key) ? null : next; } } // Generate binding information for one CommentBlock. private void GenerateBindings( ICommentBlock block, ElementStack elementStack, KeyValuePair? nextElement, CommentBindingCallback cb ) { if (block.CommentLines.Any()) { GenerateBindings( block, elementStack.FindBefore(block.Location), elementStack.FindAfter(block.Location, nextElement), elementStack.FindParent(block.Location), cb); } } /// /// Process comments up until nextElement. /// Group comments into blocks, and associate blocks with elements. /// /// /// Enumerator for all comments in the program. /// The next element in the list. /// A stack of nested program elements. /// Where to send the results. /// true if there are more comments to process, false otherwise. bool GenerateBindings( IEnumerator> commentEnumerator, KeyValuePair? nextElement, ElementStack elementStack, CommentBindingCallback cb ) { CommentBlock block = new CommentBlock(); // Iterate comments until the commentEnumerator has gone past nextElement while (nextElement == null || Compare(commentEnumerator.Current.Value.Location, nextElement.Value.Key) < 0) { if (!block.CombinesWith(commentEnumerator.Current.Value)) { // Start of a new block, so generate the bindings for the old block first. GenerateBindings(block, elementStack, nextElement, cb); block = new CommentBlock(); } block.AddCommentLine(commentEnumerator.Current.Value); // Get the next comment. if (!commentEnumerator.MoveNext()) { // If there are no more comments, generate the remaining bindings and return false. GenerateBindings(block, elementStack, nextElement, cb); return false; } } GenerateBindings(block, elementStack, nextElement, cb); return true; } /// /// Merge comments into blocks and associate comment blocks with program elements. /// /// Callback for the binding information public void GenerateBindings(CommentBindingCallback cb) { /* Algorithm: * Do a merge of elements and comments, which are both sorted in location order. * * Iterate through all elements, and iterate all comment lines between adjacent pairs of elements. * Maintain a stack of elements, such that the top of the stack must be fully nested in the * element below it. This enables comments to be associated with the "parent" element, as well as * elements before, after and "best" element match for a comment. * * This is an O(n) algorithm because the list of elements and comments are traversed once. * (Note that comment processing is O(n.log n) overall due to dictionary of elements and comments.) */ ElementStack elementStack = new ElementStack(); using (IEnumerator> elementEnumerator = elements.GetEnumerator()) using (IEnumerator> commentEnumerator = comments.GetEnumerator()) { if (!commentEnumerator.MoveNext()) { // There are no comments to process. return; } while (elementEnumerator.MoveNext()) { if (!GenerateBindings(commentEnumerator, elementEnumerator.Current, elementStack, cb)) { // No more comments to process. return; } elementStack.Push(elementEnumerator.Current); } // Generate remaining comments at end of file GenerateBindings(commentEnumerator, null, elementStack, cb); } } } class CommentBlock : ICommentBlock { private readonly List lines = new List(); public IEnumerable CommentLines => lines; public Location Location { get; private set; } /// /// Determine whether commentlines should be merged. /// /// A comment line to be appended to this comment block. /// Whether the new line should be appended to this block. public bool CombinesWith(ICommentLine newLine) { if (!CommentLines.Any()) return true; bool sameFile = Location.SourceTree == newLine.Location.SourceTree; bool sameRow = Location.EndLine() == newLine.Location.StartLine(); bool sameColumn = Location.EndLine() + 1 == newLine.Location.StartLine(); bool nextRow = Location.StartColumn() == newLine.Location.StartColumn(); bool adjacent = sameFile && (sameRow || (sameColumn && nextRow)); return newLine.Type == CommentLineType.MultilineContinuation || adjacent; } /// /// Adds a comment line to the this comment block. /// /// The line to add. public void AddCommentLine(ICommentLine line) { Location = !lines.Any() ? line.Location : Location.Create(line.Location.SourceTree, new TextSpan(Location.SourceSpan.Start, line.Location.SourceSpan.End - Location.SourceSpan.Start)); lines.Add(line); } } }