This commit is contained in:
Rizky 2024-02-11 20:56:50 +07:00
parent 6c579b3866
commit e22fcb8015
5 changed files with 512 additions and 29 deletions

BIN
bun.lockb

Binary file not shown.

View File

@ -18,7 +18,6 @@
"radash": "^11.0.0",
"radix3": "^1.1.0",
"typescript": "^5.2.2",
"unzipper": "^0.10.14",
"fast-myers-diff": "^3.2.0"
"unzipper": "^0.10.14"
}
}

View File

@ -0,0 +1,416 @@
export type GenericIndexable<T> = { [key: number]: T; readonly length: number };
type TypedArray =
| Int8Array
| Int16Array
| Int32Array
| Uint8Array
| Uint16Array
| Uint32Array
| Float32Array
| Float64Array;
export type Indexable<T> = string | T[] | TypedArray | GenericIndexable<T>;
export interface Sliceable<T> extends GenericIndexable<T> {
slice(start: number, end?: number): this;
}
type Vec4 = [number, number, number, number];
type Vec3 = [number, number, number];
type Comparator = (i: number, j: number) => boolean;
type DiffState = {
i: number;
N: number;
j: number;
M: number;
Z: number;
b: TypedArray;
eq: (x: number, y: number) => boolean;
should_break?: () => boolean;
pxs: number;
pxe: number;
pys: number;
pye: number;
oxs: number;
oxe: number;
oys: number;
oye: number;
stack_top: number;
stack_base: number[];
};
// Find the list of differences between 2 lists by
// recursive subdivision, requring O(min(N,M)) space
// and O(min(N,M)*D) worst-case execution time where
// D is the number of differences.
function diff_internal(state: DiffState, c: number): number {
const { b, eq, stack_base } = state;
let { i, N, j, M, Z, stack_top } = state;
for (;;) {
switch (c) {
case 0: {
Z_block: while (N > 0 && M > 0) {
b.fill(0, 0, 2 * Z);
const W = N - M;
const L = N + M;
const parity = L & 1;
const offsetx = i + N - 1;
const offsety = j + M - 1;
const hmax = (L + parity) / 2;
let z: number;
h_loop: for (let h = 0; h <= hmax; h++) {
if (state.should_break && state.should_break()) {
return 0;
}
const kmin = 2 * Math.max(0, h - M) - h;
const kmax = h - 2 * Math.max(0, h - N);
// Forward pass
for (let k = kmin; k <= kmax; k += 2) {
const gkm = b[k - 1 - Z * Math.floor((k - 1) / Z)];
const gkp = b[k + 1 - Z * Math.floor((k + 1) / Z)];
const u = k === -h || (k !== h && gkm < gkp) ? gkp : gkm + 1;
const v = u - k;
let x = u;
let y = v;
while (x < N && y < M && eq(i + x, j + y)) x++, y++;
b[k - Z * Math.floor(k / Z)] = x;
if (
parity === 1 &&
(z = W - k) >= 1 - h &&
z < h &&
x + b[Z + z - Z * Math.floor(z / Z)] >= N
) {
if (h > 1 || x !== u) {
stack_base[stack_top++] = i + x;
stack_base[stack_top++] = N - x;
stack_base[stack_top++] = j + y;
stack_base[stack_top++] = M - y;
N = u;
M = v;
Z = 2 * (Math.min(N, M) + 1);
continue Z_block;
} else break h_loop;
}
}
// Reverse pass
for (let k = kmin; k <= kmax; k += 2) {
const pkm = b[Z + k - 1 - Z * Math.floor((k - 1) / Z)];
const pkp = b[Z + k + 1 - Z * Math.floor((k + 1) / Z)];
const u = k === -h || (k !== h && pkm < pkp) ? pkp : pkm + 1;
const v = u - k;
let x = u;
let y = v;
while (x < N && y < M && eq(offsetx - x, offsety - y)) x++, y++;
b[Z + k - Z * Math.floor(k / Z)] = x;
if (
parity === 0 &&
(z = W - k) >= -h &&
z <= h &&
x + b[z - Z * Math.floor(z / Z)] >= N
) {
if (h > 0 || x !== u) {
stack_base[stack_top++] = i + N - u;
stack_base[stack_top++] = u;
stack_base[stack_top++] = j + M - v;
stack_base[stack_top++] = v;
N = N - x;
M = M - y;
Z = 2 * (Math.min(N, M) + 1);
continue Z_block;
} else break h_loop;
}
}
}
if (N === M) continue;
if (M > N) {
i += N;
j += N;
M -= N;
N = 0;
} else {
i += M;
j += M;
N -= M;
M = 0;
}
// We already know either N or M is zero, so we can
// skip the extra check at the top of the loop.
break;
}
// yield delete_start, delete_end, insert_start, insert_end
// At this point, at least one of N & M is zero, or we
// wouldn't have gotten out of the preceding loop yet.
if (N + M !== 0) {
if (state.pxe === i || state.pye === j) {
// it is a contiguous difference extend the existing one
state.pxe = i + N;
state.pye = j + M;
} else {
const sx = state.pxs;
state.oxs = state.pxs;
state.oxe = state.pxe;
state.oys = state.pys;
state.oye = state.pye;
// Defer this one until we can check the next one
state.pxs = i;
state.pxe = i + N;
state.pys = j;
state.pye = j + M;
if (sx >= 0) {
state.i = i;
state.N = N;
state.j = j;
state.M = M;
state.Z = Z;
state.stack_top = stack_top;
return 1;
}
}
}
}
case 1: {
if (stack_top === 0) return 2;
M = stack_base[--stack_top];
j = stack_base[--stack_top];
N = stack_base[--stack_top];
i = stack_base[--stack_top];
Z = 2 * (Math.min(N, M) + 1);
c = 0;
}
}
}
}
class DiffGen implements IterableIterator<Vec4> {
private c = 0;
private result: IteratorResult<Vec4, undefined> = {
value: null as any,
done: false,
};
constructor(private state: DiffState) {}
[Symbol.iterator]() {
return this;
}
next() {
const { state, result } = this;
if (state.should_break && state.should_break()) {
result.done = true;
result.value = undefined;
return result;
}
if (this.c > 1) {
result.done = true;
result.value = undefined;
return result;
}
const c = diff_internal(state, this.c);
if (state.should_break && state.should_break()) {
result.done = true;
result.value = undefined;
return result;
}
this.c = c;
if (c === 1) {
result.value = [state.oxs, state.oxe, state.oys, state.oye];
return result;
}
if (state.pxs >= 0) {
result.value = [state.pxs, state.pxe, state.pys, state.pye];
return result;
}
result.done = true;
result.value = undefined;
return result;
}
}
export function diff_core(
i: number,
N: number,
j: number,
M: number,
eq: Comparator,
should_break?: () => boolean
): IterableIterator<Vec4> {
const Z = (Math.min(N, M) + 1) * 2;
const L = N + M;
const b = new (L < 256 ? Uint8Array : L < 65536 ? Uint16Array : Uint32Array)(
2 * Z
);
return new DiffGen({
i,
N,
j,
M,
Z,
b,
eq,
should_break,
pxs: -1,
pxe: -1,
pys: -1,
pye: -1,
oxs: -1,
oxe: -1,
oys: -1,
oye: -1,
stack_top: 0,
stack_base: [],
});
}
export function diff<T extends Indexable<unknown>>(
xs: T,
ys: T,
eq?: Comparator,
should_break?: () => boolean
): IterableIterator<Vec4> {
let [i, N, M] = [0, xs.length, ys.length];
if (typeof eq === "function") {
// eliminate common prefix
while (i < N && i < M && eq(i, i)) i++;
// check for equality
if (i === N && i === M) return [][Symbol.iterator]();
// eliminate common suffix
while (eq(--N, --M) && N > i && M > i);
} else {
// eliminate common prefix
while (i < N && i < M && xs[i] === ys[i]) i++;
// check for equality
if (i === N && i === M) return [][Symbol.iterator]();
// eliminate common suffix
while (xs[--N] === ys[--M] && N > i && M > i);
eq = (i, j) => xs[i] === ys[j];
}
return diff_core(i, N + 1 - i, i, M + 1 - i, eq, should_break);
}
class LCSGen implements IterableIterator<Vec3> {
private i = 0;
private j = 0;
constructor(
private diff: IterableIterator<Vec4>,
private N: number
) {}
[Symbol.iterator]() {
return this;
}
next() {
// Convert diffs into the dual similar-aligned representation.
// In each iteration, i and j will be aligned at the beginning
// of a shared section. This section is yielded, and i and j
// are re-aligned at the end of the succeeding unique sections.
const rec = this.diff.next();
if (rec.done) {
const { i, j, N } = this;
if (i < N) {
rec.done = false as any;
rec.value = [i, j, N - i] as any;
this.i = N;
}
return rec as IteratorResult<Vec3>;
}
const v = rec.value;
const sx = v[0];
const ex = v[1];
const ey = v[3];
const { i, j } = this;
if (i !== sx) {
v.length--; // re-use the vec4 as a vec3 to avoid allocation
v[0] = i;
v[1] = j;
v[2] = sx - i;
}
this.i = ex;
this.j = ey;
return rec as unknown as IteratorResult<Vec3>;
}
}
export function lcs<T extends Indexable<unknown>>(
xs: T,
ys: T,
eq?: Comparator
): IterableIterator<Vec3> {
return new LCSGen(diff(xs, ys, eq), xs.length);
}
export function* calcPatch<T, S extends Sliceable<T>>(
xs: S,
ys: S,
eq?: Comparator,
should_break?: () => boolean
): Generator<[number, number, S]> {
// Taking subarrays is cheaper than slicing for TypedArrays.
const slice = ArrayBuffer.isView(xs)
? (Uint8Array.prototype.subarray as unknown as typeof xs.slice)
: xs.slice;
for (const v of diff(xs, ys, eq, should_break)) {
v[2] = slice.call(ys, v[2], v[3]) as any;
yield v as any;
}
}
export function* applyPatch<T, S extends Sliceable<T>>(
xs: S,
patch: Iterable<[number, number, S]>
): Generator<S> {
let i = 0; // Taking subarrays is cheaper than slicing for TypedArrays.
const slice = ArrayBuffer.isView(xs)
? (Uint8Array.prototype.subarray as unknown as typeof xs.slice)
: xs.slice;
for (const [dels, dele, ins] of patch) {
if (i < dels) yield slice.call(xs, i, dels);
if (ins.length > 0) yield ins;
i = dele;
}
if (i < xs.length) yield slice.call(xs, i);
}
export function* calcSlices<T, S extends Sliceable<T>>(
xs: S,
ys: S,
eq?: Comparator
): Generator<[-1 | 0 | 1, S]> {
let i = 0; // Taking subarrays is cheaper than slicing for TypedArrays.
const slice = ArrayBuffer.isView(xs)
? (Uint8Array.prototype.subarray as unknown as typeof xs.slice)
: xs.slice;
for (const [dels, dele, inss, inse] of diff(xs, ys, eq)) {
if (i < dels) yield [0, slice.call(xs, i, dels)];
if (dels < dele) yield [-1, slice.call(xs, dels, dele)];
if (inss < inse) yield [1, slice.call(ys, inss, inse)];
i = dele;
}
if (i < xs.length) yield [0, xs.slice(i)];
}

View File

@ -1,16 +1,56 @@
import { describe, expect, test } from "bun:test";
import { Diff } from "./diff";
import { describe } from "bun:test";
import { readAsync } from "fs-jetpack";
import { Diff, gunzipAsync } from "./diff";
import { Packr } from "msgpackr";
const MAX_HISTORY = 10;
const packr = new Packr({});
describe("simple diff", async () => {
const server = await Diff.server("ini ionadi a");
const patch = server.getPatch("new");
const server = await Diff.server<any>(await readAsync("tsconfig.json"));
const patch = await server.getPatch("new");
const client = await Diff.client(patch);
console.log("init", hmn(patch.length));
const client = await Diff.client<string>(patch);
console.log("\npatch1");
await server.update("rako12");
const patch1 = await server.getPatch(client.ts);
await client.applyPatch(patch1);
console.log(
hmn(patch1.length),
`|| client: ${hmn((await client.data).length)}`
);
const newPatch = server.getPatch(client.ts);
console.log("\npatch2");
const bin = await readAsync("data/prod/main.js", "utf8");
await server.update(bin);
const patch2 = await server.getPatch(client.ts);
await client.applyPatch(patch2);
console.log(
hmn(patch2.length),
`|| client: ${hmn((await client.data).length)}`
);
await client.applyPatch(newPatch);
expect(await client.data).toBe("rako12");
console.log("\npatch3");
const ubin = "mantappu" + bin?.substring(0,100000);
await server.update(ubin);
const patch3 = await server.getPatch(client.ts);
await client.applyPatch(patch3);
console.log(
hmn(patch3.length),
`|| client: ${hmn((await client.data).length)}`
);
});
function hmn(bytes: number): string {
const sizes = ["bytes", "KB", "MB", "GB", "TB"];
if (bytes === 0) return "0 bytes";
const i = Math.floor(Math.log(bytes) / Math.log(1024));
const size = i === 0 ? bytes : (bytes / Math.pow(1024, i)).toFixed(2);
return `${size} ${sizes[i]}`;
}

View File

@ -1,12 +1,13 @@
import { applyPatch, calcPatch } from "fast-myers-diff";
import { applyPatch, calcPatch } from "./diff-internal";
import { Packr } from "msgpackr";
import { gunzip, gzip } from "zlib";
const MAX_HISTORY = 10;
const MAX_HISTORY = 10; // max history item
const DIFF_TIMEOUT = 50; // in ms
const packr = new Packr({});
type PATCH_RESULT<T> =
type PATCH_RESULT =
| {
mode: "new";
ts: string;
@ -22,7 +23,7 @@ export class Diff<T> {
ts = "";
mode: "client" | "server" = "server";
private _data: number[] = [];
_data: number[] = [];
private _history = {} as Record<string, number[]>;
constructor() {}
@ -58,20 +59,51 @@ export class Diff<T> {
}
}
getPatch(ts: "new" | string): PATCH_RESULT<T> {
getPatch(ts: "new" | string) {
return new Promise<Uint8Array>((done) => {
if (ts !== "new") {
const old_data = this._history[ts];
if (old_data) {
const result_diff = [...calcPatch(old_data, this._data)];
return { diff: result_diff, mode: "patch", ts: this.ts };
const now = performance.now();
const result_diff = [
...calcPatch(
old_data,
this._data,
(key1, key2) => {
return old_data[key1] === this._data[key2];
},
() => {
return performance.now() - now > DIFF_TIMEOUT;
}
),
];
if (performance.now() - now <= DIFF_TIMEOUT) {
done(
new Uint8Array(
packr.pack({ diff: result_diff, mode: "patch", ts: this.ts })
)
);
return;
}
}
}
return { data: this._data, mode: "new", ts: this.ts };
done(
new Uint8Array(
packr.pack({ data: this._data, mode: "new", ts: this.ts })
)
);
});
}
async applyPatch(patch: PATCH_RESULT<T>) {
async applyPatch(_patch: Uint8Array) {
const patch = packr.unpack(_patch) as PATCH_RESULT;
console.log(
patch.mode,
patch.mode === "new" ? patch.data.length : patch.diff.length
);
if (patch.mode === "new") {
this.ts = patch.ts;
if (patch.data) this._data = patch.data;
@ -97,14 +129,10 @@ export class Diff<T> {
return diff;
}
static async client<T>(patch: PATCH_RESULT<T>) {
static async client<T>(patch: Uint8Array) {
const diff = new Diff<T>();
diff.mode = "client";
if (patch.mode === "new") {
diff.ts = patch.ts;
if (patch.data) diff._data = patch.data;
}
diff.applyPatch(patch);
return diff;
}
}