Unit tests for SplitBuffer
This commit is contained in:
@@ -4,11 +4,12 @@ import { StringDecoder } from "string_decoder";
|
||||
/**
|
||||
* Buffer to hold state used when splitting a text stream into lines.
|
||||
*/
|
||||
class SplitBuffer {
|
||||
export class SplitBuffer {
|
||||
private readonly decoder = new StringDecoder("utf8");
|
||||
private readonly maxSeparatorLength: number;
|
||||
private buffer = "";
|
||||
private searchIndex = 0;
|
||||
private ended = false;
|
||||
|
||||
constructor(private readonly separators: readonly string[]) {
|
||||
this.maxSeparatorLength = separators
|
||||
@@ -29,7 +30,7 @@ class SplitBuffer {
|
||||
*/
|
||||
public end(): void {
|
||||
this.buffer += this.decoder.end();
|
||||
this.buffer += this.separators[0]; // Append a separator to the end to ensure the last line is returned.
|
||||
this.ended = true;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -56,7 +57,14 @@ class SplitBuffer {
|
||||
* line is available.
|
||||
*/
|
||||
public getNextLine(): string | undefined {
|
||||
while (this.searchIndex <= this.buffer.length - this.maxSeparatorLength) {
|
||||
// If we haven't received all of the input yet, don't search too close to the end of the buffer,
|
||||
// or we could match a separator that's split across two chunks. For example, we could see "\r"
|
||||
// at the end of the buffer and match that, even though we were about to receive a "\n" right
|
||||
// after it.
|
||||
const maxSearchIndex = this.ended
|
||||
? this.buffer.length - 1
|
||||
: this.buffer.length - this.maxSeparatorLength;
|
||||
while (this.searchIndex <= maxSearchIndex) {
|
||||
for (const separator of this.separators) {
|
||||
if (SplitBuffer.startsWith(this.buffer, separator, this.searchIndex)) {
|
||||
const line = this.buffer.slice(0, this.searchIndex);
|
||||
@@ -68,7 +76,15 @@ class SplitBuffer {
|
||||
this.searchIndex++;
|
||||
}
|
||||
|
||||
return undefined;
|
||||
if (this.ended && this.buffer.length > 0) {
|
||||
// If we still have some text left in the buffer, return it as the last line.
|
||||
const line = this.buffer;
|
||||
this.buffer = "";
|
||||
this.searchIndex = 0;
|
||||
return line;
|
||||
} else {
|
||||
return undefined;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -0,0 +1,84 @@
|
||||
import { LINE_ENDINGS, SplitBuffer } from "../../../src/common/split-stream";
|
||||
|
||||
interface Chunk {
|
||||
chunk: string;
|
||||
lines: string[];
|
||||
}
|
||||
|
||||
function checkLines(
|
||||
buffer: SplitBuffer,
|
||||
expectedLinesForChunk: string[],
|
||||
chunkIndex: number | "end",
|
||||
): void {
|
||||
expectedLinesForChunk.forEach((expectedLine, lineIndex) => {
|
||||
const line = buffer.getNextLine();
|
||||
const location = `[chunk ${chunkIndex}, line ${lineIndex}]: `;
|
||||
expect(location + line).toEqual(location + expectedLine);
|
||||
});
|
||||
expect(buffer.getNextLine()).toBeUndefined();
|
||||
}
|
||||
|
||||
function testSplitBuffer(chunks: Chunk[], endLines: string[]): void {
|
||||
const buffer = new SplitBuffer(LINE_ENDINGS);
|
||||
chunks.forEach((chunk, chunkIndex) => {
|
||||
buffer.addChunk(Buffer.from(chunk.chunk, "utf-8"));
|
||||
checkLines(buffer, chunk.lines, chunkIndex);
|
||||
});
|
||||
buffer.end();
|
||||
checkLines(buffer, endLines, "end");
|
||||
}
|
||||
|
||||
describe("split buffer", () => {
|
||||
it("should handle a one-chunk string with no terminator", async () => {
|
||||
// Won't return the line until we call `end()`.
|
||||
testSplitBuffer([{ chunk: "some text", lines: [] }], ["some text"]);
|
||||
});
|
||||
|
||||
it("should handle a one-chunk string with a one-byte terminator", async () => {
|
||||
// Won't return the line until we call `end()` because the actual terminator is shorter than the
|
||||
// longest terminator.
|
||||
testSplitBuffer([{ chunk: "some text\n", lines: [] }], ["some text"]);
|
||||
});
|
||||
|
||||
it("should handle a one-chunk string with a two-byte terminator", async () => {
|
||||
testSplitBuffer([{ chunk: "some text\r\n", lines: ["some text"] }], []);
|
||||
});
|
||||
|
||||
it("should handle a multi-chunk string with terminators at the end of each chunk", async () => {
|
||||
testSplitBuffer(
|
||||
[
|
||||
{ chunk: "first line\n", lines: [] }, // Waiting for second potential terminator byte
|
||||
{ chunk: "second line\r", lines: ["first line"] }, // Waiting for second potential terminator byte
|
||||
{ chunk: "third line\r\n", lines: ["second line", "third line"] }, // No wait, because we're at the end
|
||||
],
|
||||
[],
|
||||
);
|
||||
});
|
||||
|
||||
it("should handle a multi-chunk string with terminators at random offsets", async () => {
|
||||
testSplitBuffer(
|
||||
[
|
||||
{ chunk: "first line\nsecond", lines: ["first line"] },
|
||||
{
|
||||
chunk: " line\rthird line",
|
||||
lines: ["second line"],
|
||||
},
|
||||
{ chunk: "\r\n", lines: ["third line"] },
|
||||
],
|
||||
[],
|
||||
);
|
||||
});
|
||||
|
||||
it("should handle a terminator split between chunks", async () => {
|
||||
testSplitBuffer(
|
||||
[
|
||||
{ chunk: "first line\r", lines: [] },
|
||||
{
|
||||
chunk: "\nsecond line",
|
||||
lines: ["first line"],
|
||||
},
|
||||
],
|
||||
["second line"],
|
||||
);
|
||||
});
|
||||
});
|
||||
Reference in New Issue
Block a user