Unit tests for SplitBuffer

This commit is contained in:
Dave Bartolomeo
2023-09-26 17:54:47 -04:00
parent c972a5c0de
commit 3c63df2221
2 changed files with 104 additions and 4 deletions

View File

@@ -4,11 +4,12 @@ import { StringDecoder } from "string_decoder";
/**
* Buffer to hold state used when splitting a text stream into lines.
*/
class SplitBuffer {
export class SplitBuffer {
private readonly decoder = new StringDecoder("utf8");
private readonly maxSeparatorLength: number;
private buffer = "";
private searchIndex = 0;
private ended = false;
constructor(private readonly separators: readonly string[]) {
this.maxSeparatorLength = separators
@@ -29,7 +30,7 @@ class SplitBuffer {
*/
public end(): void {
this.buffer += this.decoder.end();
this.buffer += this.separators[0]; // Append a separator to the end to ensure the last line is returned.
this.ended = true;
}
/**
@@ -56,7 +57,14 @@ class SplitBuffer {
* line is available.
*/
public getNextLine(): string | undefined {
while (this.searchIndex <= this.buffer.length - this.maxSeparatorLength) {
// If we haven't received all of the input yet, don't search too close to the end of the buffer,
// or we could match a separator that's split across two chunks. For example, we could see "\r"
// at the end of the buffer and match that, even though we were about to receive a "\n" right
// after it.
const maxSearchIndex = this.ended
? this.buffer.length - 1
: this.buffer.length - this.maxSeparatorLength;
while (this.searchIndex <= maxSearchIndex) {
for (const separator of this.separators) {
if (SplitBuffer.startsWith(this.buffer, separator, this.searchIndex)) {
const line = this.buffer.slice(0, this.searchIndex);
@@ -68,8 +76,16 @@ class SplitBuffer {
this.searchIndex++;
}
if (this.ended && this.buffer.length > 0) {
// If we still have some text left in the buffer, return it as the last line.
const line = this.buffer;
this.buffer = "";
this.searchIndex = 0;
return line;
} else {
return undefined;
}
}
}
/**

View File

@@ -0,0 +1,84 @@
import { LINE_ENDINGS, SplitBuffer } from "../../../src/common/split-stream";
interface Chunk {
chunk: string;
lines: string[];
}
function checkLines(
buffer: SplitBuffer,
expectedLinesForChunk: string[],
chunkIndex: number | "end",
): void {
expectedLinesForChunk.forEach((expectedLine, lineIndex) => {
const line = buffer.getNextLine();
const location = `[chunk ${chunkIndex}, line ${lineIndex}]: `;
expect(location + line).toEqual(location + expectedLine);
});
expect(buffer.getNextLine()).toBeUndefined();
}
function testSplitBuffer(chunks: Chunk[], endLines: string[]): void {
const buffer = new SplitBuffer(LINE_ENDINGS);
chunks.forEach((chunk, chunkIndex) => {
buffer.addChunk(Buffer.from(chunk.chunk, "utf-8"));
checkLines(buffer, chunk.lines, chunkIndex);
});
buffer.end();
checkLines(buffer, endLines, "end");
}
describe("split buffer", () => {
it("should handle a one-chunk string with no terminator", async () => {
// Won't return the line until we call `end()`.
testSplitBuffer([{ chunk: "some text", lines: [] }], ["some text"]);
});
it("should handle a one-chunk string with a one-byte terminator", async () => {
// Won't return the line until we call `end()` because the actual terminator is shorter than the
// longest terminator.
testSplitBuffer([{ chunk: "some text\n", lines: [] }], ["some text"]);
});
it("should handle a one-chunk string with a two-byte terminator", async () => {
testSplitBuffer([{ chunk: "some text\r\n", lines: ["some text"] }], []);
});
it("should handle a multi-chunk string with terminators at the end of each chunk", async () => {
testSplitBuffer(
[
{ chunk: "first line\n", lines: [] }, // Waiting for second potential terminator byte
{ chunk: "second line\r", lines: ["first line"] }, // Waiting for second potential terminator byte
{ chunk: "third line\r\n", lines: ["second line", "third line"] }, // No wait, because we're at the end
],
[],
);
});
it("should handle a multi-chunk string with terminators at random offsets", async () => {
testSplitBuffer(
[
{ chunk: "first line\nsecond", lines: ["first line"] },
{
chunk: " line\rthird line",
lines: ["second line"],
},
{ chunk: "\r\n", lines: ["third line"] },
],
[],
);
});
it("should handle a terminator split between chunks", async () => {
testSplitBuffer(
[
{ chunk: "first line\r", lines: [] },
{
chunk: "\nsecond line",
lines: ["first line"],
},
],
["second line"],
);
});
});