diff --git a/docs/docs/03-hooks/01-natural-language-processing/usePrivacyFilter.md b/docs/docs/03-hooks/01-natural-language-processing/usePrivacyFilter.md index 97495f510b..f7b6699e85 100644 --- a/docs/docs/03-hooks/01-natural-language-processing/usePrivacyFilter.md +++ b/docs/docs/03-hooks/01-natural-language-processing/usePrivacyFilter.md @@ -53,7 +53,7 @@ try { `usePrivacyFilter` takes [`PrivacyFilterProps`](../../06-api-reference/interfaces/PrivacyFilterProps.md) that consists of: -- `model` of type [`PrivacyFilterModelSources`](../../06-api-reference/interfaces/PrivacyFilterModelSources.md) containing the model source, tokenizer source, and BIOES label list. +- `model` of type [`PrivacyFilterModelSources`](../../06-api-reference/type-aliases/PrivacyFilterModelSources.md) — a built-in preset (`modelName` + `modelSource` + `tokenizerSource`). The label list and Viterbi defaults are resolved from `modelName`; for custom fine-tunes use [`PrivacyFilterModule.fromCustomModel`](../../04-typescript-api/01-natural-language-processing/PrivacyFilterModule.md) directly. - An optional flag [`preventLoad`](../../06-api-reference/interfaces/PrivacyFilterProps.md#preventload) which prevents auto-loading of the model. You need more details? Check the following resources: @@ -78,7 +78,7 @@ Token indices in returned entities are positions in the tokenizer's output (the ### Tuning precision and recall -Both built-in models ship with neutral, validity-only Viterbi decoding by default. If you want to shift the precision/recall tradeoff, pass an optional [`viterbiBiases`](../../06-api-reference/interfaces/PrivacyFilterModelSources.md#viterbibiases) object — six floats matching the operating-point schema in OpenAI's `viterbi_calibration.json`. Negative `backgroundToStart` makes the decoder enter spans more eagerly (higher recall); positive `backgroundStay` keeps it in the background label more often (higher precision). +Built-in presets run with neutral, validity-only Viterbi decoding. To shift the precision/recall tradeoff, load the model directly through [`PrivacyFilterModule.fromCustomModel`](../../04-typescript-api/01-natural-language-processing/PrivacyFilterModule.md) (reusing the preset's `modelSource` / `tokenizerSource` and label list) and pass a [`viterbiBiases`](../../06-api-reference/interfaces/ViterbiBiases.md) object — six floats matching the operating-point schema in OpenAI's `viterbi_calibration.json`. Negative `backgroundToStart` makes the decoder enter spans more eagerly (higher recall); positive `backgroundStay` keeps it in the background label more often (higher precision). ## Example diff --git a/docs/docs/04-typescript-api/01-natural-language-processing/PrivacyFilterModule.md b/docs/docs/04-typescript-api/01-natural-language-processing/PrivacyFilterModule.md index 2df0f7dff0..8a9b24d263 100644 --- a/docs/docs/04-typescript-api/01-natural-language-processing/PrivacyFilterModule.md +++ b/docs/docs/04-typescript-api/01-natural-language-processing/PrivacyFilterModule.md @@ -27,20 +27,18 @@ All methods of `PrivacyFilterModule` are explained in details here: [`PrivacyFil ## Loading the model -To create a ready-to-use instance, call the static [`fromModelName`](../../06-api-reference/classes/PrivacyFilterModule.md#frommodelname) factory with the following parameters: +To create a ready-to-use instance for a built-in preset, call the static [`fromModelName`](../../06-api-reference/classes/PrivacyFilterModule.md#frommodelname) factory with the following parameters: - `namedSources` — Object containing: - - `modelName` — Model name identifier. + - `modelName` — Built-in preset identifier (`'privacy-filter-openai'` or `'privacy-filter-nemotron'`). The runner resolves the matching BIOES label list from this value. - `modelSource` — Location of the `.pte` model binary. - `tokenizerSource` — Location of the `tokenizer.json` file. - - `labelNames` — BIOES label list. Index 0 must be `"O"`; the rest must follow the model's `id2label` mapping exactly. - - `viterbiBiases` (optional) — Six-field bias struct that shifts the decoder's precision/recall tradeoff. Defaults to neutral (validity-only Viterbi). - `onDownloadProgress` — Optional callback to track download progress (value between 0 and 1). The factory returns a promise that resolves to a loaded `PrivacyFilterModule` instance. -For custom-exported models, use [`fromCustomModel`](../../06-api-reference/classes/PrivacyFilterModule.md#fromcustommodel) instead — it takes the same fields as positional arguments and is convenient when you only have the raw resource locations. +For custom-exported models with a non-standard label space, use [`fromCustomModel`](../../06-api-reference/classes/PrivacyFilterModule.md#fromcustommodel) instead. It takes `modelSource`, `tokenizerSource`, and a `labelNames` array (BIOES; index 0 must be `"O"`, the rest must follow the model's `id2label` mapping exactly), plus an optional `options` object with `viterbiBiases` (six-field bias struct that shifts the decoder's precision/recall tradeoff; defaults to neutral validity-only Viterbi) and `onDownloadProgress`. For more information on loading resources, take a look at [loading models](../../01-fundamentals/02-loading-models.md) page. diff --git a/packages/react-native-executorch/src/constants/modelUrls.ts b/packages/react-native-executorch/src/constants/modelUrls.ts index 4dc8966aee..f02abd8e32 100644 --- a/packages/react-native-executorch/src/constants/modelUrls.ts +++ b/packages/react-native-executorch/src/constants/modelUrls.ts @@ -1,8 +1,4 @@ import { Platform } from 'react-native'; -import { - PRIVACY_FILTER_NEMOTRON_LABELS, - PRIVACY_FILTER_OPENAI_LABELS, -} from './privacyFilterLabels'; import { URL_PREFIX, PREVIOUS_VERSION_TAG } from './versions'; // LLMs @@ -1248,7 +1244,6 @@ export const PRIVACY_FILTER_OPENAI = { modelName: 'privacy-filter-openai', modelSource: `${URL_PREFIX}-privacy-filter-openai/${PREVIOUS_VERSION_TAG}/xnnpack/privacy_filter_openai_xnnpack_8da4w.pte`, tokenizerSource: `${URL_PREFIX}-privacy-filter-openai/${PREVIOUS_VERSION_TAG}/tokenizer.json`, - labelNames: PRIVACY_FILTER_OPENAI_LABELS, } as const; /** @@ -1261,7 +1256,6 @@ export const PRIVACY_FILTER_NEMOTRON = { modelName: 'privacy-filter-nemotron', modelSource: `${URL_PREFIX}-privacy-filter-nemotron/${PREVIOUS_VERSION_TAG}/xnnpack/privacy_filter_nemotron_xnnpack_8da4w.pte`, tokenizerSource: `${URL_PREFIX}-privacy-filter-nemotron/${PREVIOUS_VERSION_TAG}/tokenizer.json`, - labelNames: PRIVACY_FILTER_NEMOTRON_LABELS, } as const; // Image generation diff --git a/packages/react-native-executorch/src/constants/privacyFilterLabels.ts b/packages/react-native-executorch/src/constants/privacyFilterLabels.ts index e5a63d69ad..345889afdc 100644 --- a/packages/react-native-executorch/src/constants/privacyFilterLabels.ts +++ b/packages/react-native-executorch/src/constants/privacyFilterLabels.ts @@ -1,3 +1,5 @@ +import { PrivacyFilterModelName } from '../types/privacyFilter'; + // BIOES tag scheme: 1 outside ("O") + 4 prefix variants × N entity types. // These arrays must match the model's id2label mapping exactly — the runner // uses index = label id, and labels[0] must be "O". @@ -270,3 +272,17 @@ export const PRIVACY_FILTER_NEMOTRON_LABELS = [ 'E-vehicle_identifier', 'S-vehicle_identifier', ] as const; + +/** + * Per-model label list lookup for built-in privacy filter presets. The + * runner resolves the label space from `modelName`; custom fine-tunes + * bypass this map and pass their own list through + * `PrivacyFilterModule.fromCustomModel`. + */ +export const PRIVACY_FILTER_LABELS: Record< + PrivacyFilterModelName, + readonly string[] +> = { + 'privacy-filter-openai': PRIVACY_FILTER_OPENAI_LABELS, + 'privacy-filter-nemotron': PRIVACY_FILTER_NEMOTRON_LABELS, +}; diff --git a/packages/react-native-executorch/src/modules/natural_language_processing/PrivacyFilterModule.ts b/packages/react-native-executorch/src/modules/natural_language_processing/PrivacyFilterModule.ts index e58da59481..9b010f4c14 100644 --- a/packages/react-native-executorch/src/modules/natural_language_processing/PrivacyFilterModule.ts +++ b/packages/react-native-executorch/src/modules/natural_language_processing/PrivacyFilterModule.ts @@ -4,6 +4,7 @@ import { PrivacyFilterModelSources, ViterbiBiases, } from '../../types/privacyFilter'; +import { PRIVACY_FILTER_LABELS } from '../../constants/privacyFilterLabels'; import { ResourceFetcher } from '../../utils/ResourceFetcher'; import { BaseModule } from '../BaseModule'; import { RnExecutorchErrorCode } from '../../errors/ErrorCodes'; @@ -41,38 +42,26 @@ export class PrivacyFilterModule extends BaseModule { } /** - * Creates a Privacy Filter instance for a built-in or custom-shaped model. - * Pass one of the `PRIVACY_FILTER_*` constants from - * `react-native-executorch/constants` for a known-good config, or - * construct your own {@link PrivacyFilterModelSources} for a custom - * fine-tune. - * @param namedSources - Model + tokenizer resource locations and label list. + * Creates a Privacy Filter instance for a built-in preset. Pass one of + * the `PRIVACY_FILTER_*` constants from `react-native-executorch/constants` + * (or the matching `models.privacy_filter.*` accessor); the runner + * resolves the BIOES label list from `modelName`. For custom fine-tunes + * with a non-standard label space, use {@link fromCustomModel} instead. + * @param namedSources - Built-in model + tokenizer resource locations. * @param onDownloadProgress - Optional 0..1 download progress callback. * @returns A Promise resolving to a `PrivacyFilterModule` instance. */ - static async fromModelName( + static fromModelName( namedSources: PrivacyFilterModelSources, onDownloadProgress: (progress: number) => void = () => {} ): Promise { - try { - const [modelResult, tokenizerResult] = await Promise.all([ - ResourceFetcher.fetch(onDownloadProgress, namedSources.modelSource), - ResourceFetcher.fetch(undefined, namedSources.tokenizerSource), - ]); - const modelPath = modelResult?.[0]; - const tokenizerPath = tokenizerResult?.[0]; - if (!modelPath || !tokenizerPath) { - throw new RnExecutorchError(RnExecutorchErrorCode.DownloadInterrupted); - } - const labels = Array.from(namedSources.labelNames); - const biases = packViterbiBiases(namedSources.viterbiBiases); - return new PrivacyFilterModule( - await global.loadPrivacyFilter(modelPath, tokenizerPath, labels, biases) - ); - } catch (error) { - Logger.error('Load failed:', error); - throw parseUnknownError(error); - } + return PrivacyFilterModule.load( + namedSources.modelSource, + namedSources.tokenizerSource, + PRIVACY_FILTER_LABELS[namedSources.modelName], + undefined, + onDownloadProgress + ); } /** @@ -94,18 +83,43 @@ export class PrivacyFilterModule extends BaseModule { onDownloadProgress?: (progress: number) => void; } = {} ): Promise { - return PrivacyFilterModule.fromModelName( - { - modelName: 'custom', - modelSource, - tokenizerSource, - labelNames, - viterbiBiases: options.viterbiBiases, - }, + return PrivacyFilterModule.load( + modelSource, + tokenizerSource, + labelNames, + options.viterbiBiases, options.onDownloadProgress ?? (() => {}) ); } + private static async load( + modelSource: ResourceSource, + tokenizerSource: ResourceSource, + labelNames: readonly string[], + viterbiBiases: ViterbiBiases | undefined, + onDownloadProgress: (progress: number) => void + ): Promise { + try { + const [modelResult, tokenizerResult] = await Promise.all([ + ResourceFetcher.fetch(onDownloadProgress, modelSource), + ResourceFetcher.fetch(undefined, tokenizerSource), + ]); + const modelPath = modelResult?.[0]; + const tokenizerPath = tokenizerResult?.[0]; + if (!modelPath || !tokenizerPath) { + throw new RnExecutorchError(RnExecutorchErrorCode.DownloadInterrupted); + } + const labels = Array.from(labelNames); + const biases = packViterbiBiases(viterbiBiases); + return new PrivacyFilterModule( + await global.loadPrivacyFilter(modelPath, tokenizerPath, labels, biases) + ); + } catch (error) { + Logger.error('Load failed:', error); + throw parseUnknownError(error); + } + } + /** * Executes the model's forward pass to detect PII entity spans within the provided text. * @param text - The input text to scan for PII. diff --git a/packages/react-native-executorch/src/types/privacyFilter.ts b/packages/react-native-executorch/src/types/privacyFilter.ts index 9dc6ad6144..3e604af5f3 100644 --- a/packages/react-native-executorch/src/types/privacyFilter.ts +++ b/packages/react-native-executorch/src/types/privacyFilter.ts @@ -1,14 +1,6 @@ import { RnExecutorchError } from '../errors/errorUtils'; import { ResourceSource } from './common'; -/** - * Union of all built-in privacy filter model names. - * @category Types - */ -export type PrivacyFilterModelName = - | 'privacy-filter-openai' - | 'privacy-filter-nemotron'; - /** * Six Viterbi transition biases that match the operating-point schema * from the openai/privacy-filter `viterbi_calibration.json`. Each value @@ -35,30 +27,29 @@ export interface ViterbiBiases { } /** - * Bundle of resources needed to instantiate a privacy filter model. The - * built-in `PRIVACY_FILTER_OPENAI` / `PRIVACY_FILTER_NEMOTRON` constants - * conform to this shape; you can also build one yourself for a custom - * fine-tune as long as the label list matches the model's id2label. + * Per-model config for {@link PrivacyFilterModule.fromModelName}. Each + * built-in `modelName` resolves to its baked-in label list and default + * Viterbi biases; custom fine-tunes go through + * {@link PrivacyFilterModule.fromCustomModel} instead. * @category Types */ -export interface PrivacyFilterModelSources { - modelName: PrivacyFilterModelName | (string & {}); - modelSource: ResourceSource; - tokenizerSource: ResourceSource; - /** - * BIOES label list. Index 0 must be "O"; index i must equal the model's - * id2label[i]. The runner argmaxes over `labelNames.length` classes per - * token, so the size must match the model head exactly. - */ - labelNames: readonly string[]; - /** - * Optional Viterbi calibration. When present, biases are added during - * decoding to shift the precision/recall tradeoff. Defaults to all - * zeros (neutral) — same as the `default` operating point in OpenAI's - * `viterbi_calibration.json`. - */ - viterbiBiases?: ViterbiBiases; -} +export type PrivacyFilterModelSources = + | { + modelName: 'privacy-filter-openai'; + modelSource: ResourceSource; + tokenizerSource: ResourceSource; + } + | { + modelName: 'privacy-filter-nemotron'; + modelSource: ResourceSource; + tokenizerSource: ResourceSource; + }; + +/** + * Union of all built-in privacy filter model names. + * @category Types + */ +export type PrivacyFilterModelName = PrivacyFilterModelSources['modelName']; /** * A single detected PII entity span.