From 3c63df2221a17dc15419fd7c67c73cc27db30c87 Mon Sep 17 00:00:00 2001 From: Dave Bartolomeo Date: Tue, 26 Sep 2023 17:54:47 -0400 Subject: [PATCH] Unit tests for `SplitBuffer` --- .../ql-vscode/src/common/split-stream.ts | 24 +++++- .../unit-tests/common/split-buffer.test.ts | 84 +++++++++++++++++++ 2 files changed, 104 insertions(+), 4 deletions(-) create mode 100644 extensions/ql-vscode/test/unit-tests/common/split-buffer.test.ts diff --git a/extensions/ql-vscode/src/common/split-stream.ts b/extensions/ql-vscode/src/common/split-stream.ts index 77aa5b0cb..73c425379 100644 --- a/extensions/ql-vscode/src/common/split-stream.ts +++ b/extensions/ql-vscode/src/common/split-stream.ts @@ -4,11 +4,12 @@ import { StringDecoder } from "string_decoder"; /** * Buffer to hold state used when splitting a text stream into lines. */ -class SplitBuffer { +export class SplitBuffer { private readonly decoder = new StringDecoder("utf8"); private readonly maxSeparatorLength: number; private buffer = ""; private searchIndex = 0; + private ended = false; constructor(private readonly separators: readonly string[]) { this.maxSeparatorLength = separators @@ -29,7 +30,7 @@ class SplitBuffer { */ public end(): void { this.buffer += this.decoder.end(); - this.buffer += this.separators[0]; // Append a separator to the end to ensure the last line is returned. + this.ended = true; } /** @@ -56,7 +57,14 @@ class SplitBuffer { * line is available. */ public getNextLine(): string | undefined { - while (this.searchIndex <= this.buffer.length - this.maxSeparatorLength) { + // If we haven't received all of the input yet, don't search too close to the end of the buffer, + // or we could match a separator that's split across two chunks. For example, we could see "\r" + // at the end of the buffer and match that, even though we were about to receive a "\n" right + // after it. + const maxSearchIndex = this.ended + ? this.buffer.length - 1 + : this.buffer.length - this.maxSeparatorLength; + while (this.searchIndex <= maxSearchIndex) { for (const separator of this.separators) { if (SplitBuffer.startsWith(this.buffer, separator, this.searchIndex)) { const line = this.buffer.slice(0, this.searchIndex); @@ -68,7 +76,15 @@ class SplitBuffer { this.searchIndex++; } - return undefined; + if (this.ended && this.buffer.length > 0) { + // If we still have some text left in the buffer, return it as the last line. + const line = this.buffer; + this.buffer = ""; + this.searchIndex = 0; + return line; + } else { + return undefined; + } } } diff --git a/extensions/ql-vscode/test/unit-tests/common/split-buffer.test.ts b/extensions/ql-vscode/test/unit-tests/common/split-buffer.test.ts new file mode 100644 index 000000000..32df4c05b --- /dev/null +++ b/extensions/ql-vscode/test/unit-tests/common/split-buffer.test.ts @@ -0,0 +1,84 @@ +import { LINE_ENDINGS, SplitBuffer } from "../../../src/common/split-stream"; + +interface Chunk { + chunk: string; + lines: string[]; +} + +function checkLines( + buffer: SplitBuffer, + expectedLinesForChunk: string[], + chunkIndex: number | "end", +): void { + expectedLinesForChunk.forEach((expectedLine, lineIndex) => { + const line = buffer.getNextLine(); + const location = `[chunk ${chunkIndex}, line ${lineIndex}]: `; + expect(location + line).toEqual(location + expectedLine); + }); + expect(buffer.getNextLine()).toBeUndefined(); +} + +function testSplitBuffer(chunks: Chunk[], endLines: string[]): void { + const buffer = new SplitBuffer(LINE_ENDINGS); + chunks.forEach((chunk, chunkIndex) => { + buffer.addChunk(Buffer.from(chunk.chunk, "utf-8")); + checkLines(buffer, chunk.lines, chunkIndex); + }); + buffer.end(); + checkLines(buffer, endLines, "end"); +} + +describe("split buffer", () => { + it("should handle a one-chunk string with no terminator", async () => { + // Won't return the line until we call `end()`. + testSplitBuffer([{ chunk: "some text", lines: [] }], ["some text"]); + }); + + it("should handle a one-chunk string with a one-byte terminator", async () => { + // Won't return the line until we call `end()` because the actual terminator is shorter than the + // longest terminator. + testSplitBuffer([{ chunk: "some text\n", lines: [] }], ["some text"]); + }); + + it("should handle a one-chunk string with a two-byte terminator", async () => { + testSplitBuffer([{ chunk: "some text\r\n", lines: ["some text"] }], []); + }); + + it("should handle a multi-chunk string with terminators at the end of each chunk", async () => { + testSplitBuffer( + [ + { chunk: "first line\n", lines: [] }, // Waiting for second potential terminator byte + { chunk: "second line\r", lines: ["first line"] }, // Waiting for second potential terminator byte + { chunk: "third line\r\n", lines: ["second line", "third line"] }, // No wait, because we're at the end + ], + [], + ); + }); + + it("should handle a multi-chunk string with terminators at random offsets", async () => { + testSplitBuffer( + [ + { chunk: "first line\nsecond", lines: ["first line"] }, + { + chunk: " line\rthird line", + lines: ["second line"], + }, + { chunk: "\r\n", lines: ["third line"] }, + ], + [], + ); + }); + + it("should handle a terminator split between chunks", async () => { + testSplitBuffer( + [ + { chunk: "first line\r", lines: [] }, + { + chunk: "\nsecond line", + lines: ["first line"], + }, + ], + ["second line"], + ); + }); +});