diff --git a/packages/pglite/examples/dumpDataDir.html b/packages/pglite/examples/dumpDataDir.html new file mode 100644 index 000000000..f02cfc466 --- /dev/null +++ b/packages/pglite/examples/dumpDataDir.html @@ -0,0 +1 @@ + \ No newline at end of file diff --git a/packages/pglite/examples/dumpDataDir.js b/packages/pglite/examples/dumpDataDir.js new file mode 100644 index 000000000..35bdd52ee --- /dev/null +++ b/packages/pglite/examples/dumpDataDir.js @@ -0,0 +1,32 @@ +import { PGlite } from "../dist/index.js"; + +const pg = new PGlite(); +await pg.exec(` + CREATE TABLE IF NOT EXISTS test ( + id SERIAL PRIMARY KEY, + name TEXT + ); +`); +await pg.exec("INSERT INTO test (name) VALUES ('test');"); + +const file = await pg.dumpDataDir(); + +if (typeof window !== "undefined") { + // Download the dump + const url = URL.createObjectURL(file); + const a = document.createElement("a"); + a.href = url; + a.download = file.name; + a.click(); +} else { + // Save the dump to a file using node fs + const fs = await import("fs"); + fs.writeFileSync(file.name, await file.arrayBuffer()); +} + +const pg2 = new PGlite({ + loadDataDir: file, +}); + +const rows = await pg2.query("SELECT * FROM test;"); +console.log(rows); diff --git a/packages/pglite/src/definitions/tinytar.d.ts b/packages/pglite/src/definitions/tinytar.d.ts index 677f02556..90e4f7609 100644 --- a/packages/pglite/src/definitions/tinytar.d.ts +++ b/packages/pglite/src/definitions/tinytar.d.ts @@ -34,6 +34,8 @@ declare module "tinytar" { const NULL_CHAR: string; const TMAGIC: string; const OLDGNU_MAGIC: string; + + // Values used in typeflag field const REGTYPE: number; const LNKTYPE: number; const SYMTYPE: number; @@ -42,6 +44,8 @@ declare module "tinytar" { const DIRTYPE: number; const FIFOTYPE: number; const CONTTYPE: number; + + // Bits used in the mode field, values in octal const TSUID: number; const TSGID: number; const TSVTX: number; @@ -54,6 +58,7 @@ declare module "tinytar" { const TOREAD: number; const TOWRITE: number; const TOEXEC: number; + const TPERMALL: number; const TPERMMASK: number; } diff --git a/packages/pglite/src/fs/idbfs.ts b/packages/pglite/src/fs/idbfs.ts index 7a34fae1f..29fe13e47 100644 --- a/packages/pglite/src/fs/idbfs.ts +++ b/packages/pglite/src/fs/idbfs.ts @@ -1,6 +1,7 @@ import { FilesystemBase } from "./types.js"; import type { FS, PostgresMod } from "../postgres.js"; import { PGDATA } from "./index.js"; +import { dumpTar } from "./tarUtils.js"; export class IdbFs extends FilesystemBase { async emscriptenOpts(opts: Partial) { @@ -48,4 +49,8 @@ export class IdbFs extends FilesystemBase { }); }); } + + async dumpTar(mod: FS, dbname: string) { + return dumpTar(mod, dbname); + } } diff --git a/packages/pglite/src/fs/memoryfs.ts b/packages/pglite/src/fs/memoryfs.ts index ee7aa7a29..82b7ffdcf 100644 --- a/packages/pglite/src/fs/memoryfs.ts +++ b/packages/pglite/src/fs/memoryfs.ts @@ -1,9 +1,14 @@ import { FilesystemBase } from "./types.js"; -import type { PostgresMod } from "../postgres.js"; +import type { PostgresMod, FS } from "../postgres.js"; +import { dumpTar } from "./tarUtils.js"; export class MemoryFS extends FilesystemBase { async emscriptenOpts(opts: Partial) { // Nothing to do for memoryfs return opts; } + + async dumpTar(mod: FS, dbname: string) { + return dumpTar(mod, dbname); + } } diff --git a/packages/pglite/src/fs/nodefs.ts b/packages/pglite/src/fs/nodefs.ts index 2670f6139..8e846e5c7 100644 --- a/packages/pglite/src/fs/nodefs.ts +++ b/packages/pglite/src/fs/nodefs.ts @@ -2,7 +2,8 @@ import * as fs from "fs"; import * as path from "path"; import { FilesystemBase } from "./types.js"; import { PGDATA } from "./index.js"; -import type { PostgresMod } from "../postgres.js"; +import type { PostgresMod, FS } from "../postgres.js"; +import { dumpTar } from "./tarUtils.js"; export class NodeFS extends FilesystemBase { protected rootDir: string; @@ -29,4 +30,8 @@ export class NodeFS extends FilesystemBase { }; return options; } + + async dumpTar(mod: FS, dbname: string) { + return dumpTar(mod, dbname); + } } diff --git a/packages/pglite/src/fs/tarUtils.ts b/packages/pglite/src/fs/tarUtils.ts new file mode 100644 index 000000000..0c80d43e3 --- /dev/null +++ b/packages/pglite/src/fs/tarUtils.ts @@ -0,0 +1,203 @@ +import { tar, untar, type TarFile, REGTYPE, DIRTYPE } from "tinytar"; +import { FS } from "../postgres.js"; +import { PGDATA } from "./index.js"; + +export async function dumpTar(FS: FS, dbname?: string): Promise { + const tarball = createTarball(FS, PGDATA); + const [compressed, zipped] = await maybeZip(tarball); + const filename = (dbname || "pgdata") + (zipped ? ".tar.gz" : ".tar"); + return new File([compressed], filename, { + type: zipped ? "application/x-gtar" : "application/x-tar", + }); +} + +const compressedMimeTypes = [ + "application/x-gtar", + "application/x-tar+gzip", + "application/x-gzip", + "application/gzip", +]; + +export async function loadTar(FS: FS, file: File | Blob): Promise { + let tarball = new Uint8Array(await file.arrayBuffer()); + const filename = file instanceof File ? file.name : undefined; + const compressed = + compressedMimeTypes.includes(file.type) || + filename?.endsWith(".tgz") || + filename?.endsWith(".tar.gz"); + if (compressed) { + tarball = await unzip(tarball); + } + + const files = untar(tarball); + for (const file of files) { + const filePath = PGDATA + file.name; + + // Ensure the directory structure exists + const dirPath = filePath.split("/").slice(0, -1); + for (let i = 1; i <= dirPath.length; i++) { + const dir = dirPath.slice(0, i).join("/"); + if (!FS.analyzePath(dir).exists) { + FS.mkdir(dir); + } + } + + // Write the file or directory + if (file.type == REGTYPE) { + FS.writeFile(filePath, file.data); + FS.utime( + filePath, + dateToUnixTimestamp(file.modifyTime), + dateToUnixTimestamp(file.modifyTime), + ); + } else if (file.type == DIRTYPE) { + FS.mkdir(filePath); + } + } +} + +function readDirectory(FS: FS, path: string) { + let files: TarFile[] = []; + + const traverseDirectory = (currentPath: string) => { + const entries = FS.readdir(currentPath); + entries.forEach((entry) => { + if (entry === "." || entry === "..") { + return; + } + const fullPath = currentPath + "/" + entry; + const stats = FS.stat(fullPath); + const data = FS.isFile(stats.mode) + ? FS.readFile(fullPath, { encoding: "binary" }) + : new Uint8Array(0); + files.push({ + name: fullPath.substring(path.length), // remove the root path + mode: stats.mode, + size: stats.size, + type: FS.isFile(stats.mode) ? REGTYPE : DIRTYPE, + modifyTime: stats.mtime, + data, + }); + if (FS.isDir(stats.mode)) { + traverseDirectory(fullPath); + } + }); + }; + + traverseDirectory(path); + return files; +} + +export function createTarball(FS: FS, directoryPath: string) { + const files = readDirectory(FS, directoryPath); + const tarball = tar(files); + return tarball; +} + +export async function maybeZip( + file: Uint8Array, +): Promise<[Uint8Array, boolean]> { + if (typeof window !== "undefined" && "CompressionStream" in window) { + return [await zipBrowser(file), true]; + } else if ( + typeof process !== "undefined" && + process.versions && + process.versions.node + ) { + return [await zipNode(file), true]; + } else { + return [file, false]; + } +} + +export async function zipBrowser(file: Uint8Array): Promise { + const cs = new CompressionStream("gzip"); + const writer = cs.writable.getWriter(); + const reader = cs.readable.getReader(); + + writer.write(file); + writer.close(); + + const chunks: Uint8Array[] = []; + + while (true) { + const { value, done } = await reader.read(); + if (done) break; + if (value) chunks.push(value); + } + + const compressed = new Uint8Array( + chunks.reduce((acc, chunk) => acc + chunk.length, 0), + ); + let offset = 0; + chunks.forEach((chunk) => { + compressed.set(chunk, offset); + offset += chunk.length; + }); + + return compressed; +} + +export async function zipNode(file: Uint8Array): Promise { + const { promisify } = await import("util"); + const { gzip } = await import("zlib"); + const gzipPromise = promisify(gzip); + return await gzipPromise(file); +} + +export async function unzip(file: Uint8Array): Promise { + if (typeof window !== "undefined" && "DecompressionStream" in window) { + return await unzipBrowser(file); + } else if ( + typeof process !== "undefined" && + process.versions && + process.versions.node + ) { + return await unzipNode(file); + } else { + throw new Error("Unsupported environment for decompression"); + } +} + +export async function unzipBrowser(file: Uint8Array): Promise { + const ds = new DecompressionStream("gzip"); + const writer = ds.writable.getWriter(); + const reader = ds.readable.getReader(); + + writer.write(file); + writer.close(); + + const chunks: Uint8Array[] = []; + + while (true) { + const { value, done } = await reader.read(); + if (done) break; + if (value) chunks.push(value); + } + + const decompressed = new Uint8Array( + chunks.reduce((acc, chunk) => acc + chunk.length, 0), + ); + let offset = 0; + chunks.forEach((chunk) => { + decompressed.set(chunk, offset); + offset += chunk.length; + }); + + return decompressed; +} + +export async function unzipNode(file: Uint8Array): Promise { + const { promisify } = await import("util"); + const { gunzip } = await import("zlib"); + const gunzipPromise = promisify(gunzip); + return await gunzipPromise(file); +} + +function dateToUnixTimestamp(date: Date | number | undefined): number { + if (!date) { + return Math.floor(Date.now() / 1000); + } else { + return typeof date === "number" ? date : Math.floor(date.getTime() / 1000); + } +} diff --git a/packages/pglite/src/fs/types.ts b/packages/pglite/src/fs/types.ts index a9dcae5f7..ed09167d9 100644 --- a/packages/pglite/src/fs/types.ts +++ b/packages/pglite/src/fs/types.ts @@ -15,15 +15,17 @@ export interface Filesystem { /** * Sync the filesystem to the emscripten filesystem. */ - syncToFs(mod: FS): Promise; + syncToFs(FS: FS): Promise; /** * Sync the emscripten filesystem to the filesystem. */ - initialSyncFs(mod: FS): Promise; + initialSyncFs(FS: FS): Promise; - // on_mount(): Function; - // load_extension(ext: string): Promise; + /** + * Dump the PGDATA dir from the filesystem to a gziped tarball. + */ + dumpTar(FS: FS, dbname: string): Promise; } export abstract class FilesystemBase implements Filesystem { @@ -34,6 +36,7 @@ export abstract class FilesystemBase implements Filesystem { abstract emscriptenOpts( opts: Partial, ): Promise>; - async syncToFs(mod: FS) {} + async syncToFs(FS: FS) {} async initialSyncFs(mod: FS) {} + abstract dumpTar(mod: FS, dbname: string): Promise; } diff --git a/packages/pglite/src/interface.ts b/packages/pglite/src/interface.ts index 1eb895f15..15f3c544a 100644 --- a/packages/pglite/src/interface.ts +++ b/packages/pglite/src/interface.ts @@ -43,12 +43,19 @@ export type Extensions = { [namespace: string]: Extension | URL; }; +export interface DumpDataDirResult { + tarball: Uint8Array; + extension: ".tar" | ".tgz"; + filename: string; +} + export interface PGliteOptions { dataDir?: string; fs?: Filesystem; debug?: DebugLevel; relaxedDurability?: boolean; extensions?: Extensions; + loadDataDir?: Blob | File; } export type PGliteInterface = { @@ -83,6 +90,7 @@ export type PGliteInterface = { callback: (channel: string, payload: string) => void, ): () => void; offNotification(callback: (channel: string, payload: string) => void): void; + dumpDataDir(): Promise; }; export type PGliteInterfaceExtensions = E extends Extensions diff --git a/packages/pglite/src/pglite.ts b/packages/pglite/src/pglite.ts index 4d094d606..c184884fc 100644 --- a/packages/pglite/src/pglite.ts +++ b/packages/pglite/src/pglite.ts @@ -16,6 +16,7 @@ import type { Extensions, } from "./interface.js"; import { loadExtensionBundle, loadExtensions } from "./extensionUtils.js"; +import { loadTar } from "./fs/tarUtils.js"; import { PGDATA, WASM_PREFIX } from "./fs/index.js"; @@ -34,6 +35,8 @@ export class PGlite implements PGliteInterface { fs?: Filesystem; protected mod?: PostgresMod; + readonly dataDir?: string; + #ready = false; #closing = false; #closed = false; @@ -92,6 +95,7 @@ export class PGlite implements PGliteInterface { } else { options = dataDirOrPGliteOptions; } + this.dataDir = options.dataDir; // Enable debug logging if requested if (options?.debug !== undefined) { @@ -191,6 +195,17 @@ export class PGlite implements PGliteInterface { // Sync the filesystem from any previous store await this.fs!.initialSyncFs(this.mod.FS); + // If the user has provided a tarball to load the database from, do that now. + // We do this after the initial sync so that we can throw if the database + // already exists. + if (options.loadDataDir) { + if (this.mod.FS.analyzePath(PGDATA + "/PG_VERSION").exists) { + throw new Error("Database already exists, cannot load from tarball"); + } + this.#log("pglite: loading data from tarball"); + await loadTar(this.mod.FS, options.loadDataDir); + } + // Check and log if the database exists if (this.mod.FS.analyzePath(PGDATA + "/PG_VERSION").exists) { this.#log("pglite: found DB, resuming"); @@ -677,4 +692,12 @@ export class PGlite implements PGliteInterface { ): PGlite & PGliteInterfaceExtensions { return new PGlite(options) as any; } + + /** + * Dump the PGDATA dir from the filesystem to a gziped tarball. + */ + async dumpDataDir() { + let dbname = this.dataDir?.split("/").pop() ?? "pgdata"; + return this.fs!.dumpTar(this.mod!.FS, dbname); + } } diff --git a/packages/pglite/src/worker/index.ts b/packages/pglite/src/worker/index.ts index a2b5a9c8d..b69be732a 100644 --- a/packages/pglite/src/worker/index.ts +++ b/packages/pglite/src/worker/index.ts @@ -144,4 +144,8 @@ export class PGliteWorker implements PGliteInterface { queueMicrotask(() => listener(channel, payload)); } } + + async dumpDataDir() { + return this.#worker.dumpDataDir(); + } } diff --git a/packages/pglite/src/worker/process.ts b/packages/pglite/src/worker/process.ts index d9695b94f..cdaf91e6a 100644 --- a/packages/pglite/src/worker/process.ts +++ b/packages/pglite/src/worker/process.ts @@ -34,6 +34,10 @@ const worker = { async execProtocol(message: Uint8Array) { return await db.execProtocol(message); }, + async dumpDataDir() { + const file = await db.dumpDataDir(); + return Comlink.transfer(file, [await file.arrayBuffer()]); + }, }; Comlink.expose(worker); diff --git a/packages/pglite/tests/dump.test.js b/packages/pglite/tests/dump.test.js new file mode 100644 index 000000000..24eff1f44 --- /dev/null +++ b/packages/pglite/tests/dump.test.js @@ -0,0 +1,27 @@ +import test from "ava"; +import { PGlite } from "../dist/index.js"; + +test("dump data dir and load it", async (t) => { + const pg1 = new PGlite(); + await pg1.exec(` + CREATE TABLE IF NOT EXISTS test ( + id SERIAL PRIMARY KEY, + name TEXT + ); + `); + pg1.exec("INSERT INTO test (name) VALUES ('test');"); + + const ret1 = await pg1.query("SELECT * FROM test;"); + + const file = await pg1.dumpDataDir(); + + t.is(typeof file, "object"); + + const pg2 = new PGlite({ + loadDataDir: file, + }); + + const ret2 = await pg2.query("SELECT * FROM test;"); + + t.deepEqual(ret1, ret2); +});