From 1583680d6f6eb34afff8704545082c5e2d3c4576 Mon Sep 17 00:00:00 2001 From: Julien Wajsberg Date: Mon, 12 Dec 2022 18:28:35 +0100 Subject: [PATCH 1/6] Make getMarkerSchemaName take the marker name and payload in two different arguments This is useful so that we can use this function from the raw marker table as well. --- src/components/marker-table/index.js | 6 ++++- src/components/tooltip/Marker.js | 6 ++++- src/profile-logic/marker-data.js | 10 ++++++-- src/profile-logic/marker-schema.js | 37 +++++++++++++++++----------- 4 files changed, 40 insertions(+), 19 deletions(-) diff --git a/src/components/marker-table/index.js b/src/components/marker-table/index.js index 6928167fd2..1f85812064 100644 --- a/src/components/marker-table/index.js +++ b/src/components/marker-table/index.js @@ -123,7 +123,11 @@ class MarkerTree { start: _formatStart(marker.start, this._zeroAt), duration, name, - type: getMarkerSchemaName(this._markerSchemaByName, marker), + type: getMarkerSchemaName( + this._markerSchemaByName, + marker.name, + marker.data + ), }; this._displayDataByIndex.set(markerIndex, displayData); } diff --git a/src/components/tooltip/Marker.js b/src/components/tooltip/Marker.js index 672a5cb391..acd35d5778 100644 --- a/src/components/tooltip/Marker.js +++ b/src/components/tooltip/Marker.js @@ -230,7 +230,11 @@ class MarkerTooltipContents extends React.PureComponent { if (data) { // Add the details for the markers based on their Marker schema. - const schema = getSchemaFromMarker(markerSchemaByName, marker); + const schema = getSchemaFromMarker( + markerSchemaByName, + marker.name, + marker.data + ); if (schema) { for (const schemaData of schema.data) { // Check for a schema that is looking up and formatting a value from diff --git a/src/profile-logic/marker-data.js b/src/profile-logic/marker-data.js index 3d31ed39aa..f61c1b20a6 100644 --- a/src/profile-logic/marker-data.js +++ b/src/profile-logic/marker-data.js @@ -161,7 +161,11 @@ export function getSearchFilteredMarkerIndexes( } // Now check the schema for the marker payload for searchable - const markerSchema = getSchemaFromMarker(markerSchemaByName, marker); + const markerSchema = getSchemaFromMarker( + markerSchemaByName, + marker.name, + marker.data + ); if ( markerSchema && markerPayloadMatchesSearch(markerSchema, marker, regExp) @@ -1362,7 +1366,9 @@ export function filterMarkerByDisplayLocation( return additionalResult; } - return markerTypes.has(getMarkerSchemaName(markerSchemaByName, marker)); + return markerTypes.has( + getMarkerSchemaName(markerSchemaByName, marker.name, marker.data) + ); }); } diff --git a/src/profile-logic/marker-schema.js b/src/profile-logic/marker-schema.js index f8c8d7c029..01c8d499f4 100644 --- a/src/profile-logic/marker-schema.js +++ b/src/profile-logic/marker-schema.js @@ -22,6 +22,7 @@ import type { MarkerSchemaByName, Marker, MarkerIndex, + MarkerPayload, } from 'firefox-profiler/types'; /** @@ -86,34 +87,33 @@ export const markerSchemaFrontEndOnly: MarkerSchema[] = [ */ export function getMarkerSchemaName( markerSchemaByName: MarkerSchemaByName, - marker: Marker + markerName: string, + markerData: MarkerPayload | null ): string { - const { data, name } = marker; - // Fall back to using the name if no payload exists. - - if (data) { - const { type } = data; - if (type === 'tracing' && data.category) { + if (markerData) { + const { type } = markerData; + if (type === 'tracing' && markerData.category) { // TODO - Tracing markers have a duplicate "category" field. // See issue #2749 // Does a marker schema for the "category" exist? - return markerSchemaByName[data.category] === undefined + return markerSchemaByName[markerData.category] === undefined ? // If not, default back to tracing 'tracing' : // If so, use the category as the schema name. - data.category; + markerData.category; } if (type === 'Text') { // Text markers are a cheap and easy way to create markers with // a category. Check for schema if it exists, if not, fallback to // a Text type marker. - return markerSchemaByName[name] === undefined ? 'Text' : name; + return markerSchemaByName[markerName] === undefined ? 'Text' : markerName; } - return data.type; + return markerData.type; } - return name; + // Fall back to using the name if no payload exists. + return markerName; } /** @@ -122,10 +122,13 @@ export function getMarkerSchemaName( */ export function getSchemaFromMarker( markerSchemaByName: MarkerSchemaByName, - marker: Marker + markerName: string, + markerData: MarkerPayload | null ): MarkerSchema | null { return ( - markerSchemaByName[getMarkerSchemaName(markerSchemaByName, marker)] || null + markerSchemaByName[ + getMarkerSchemaName(markerSchemaByName, markerName, markerData) + ] || null ); } @@ -349,7 +352,11 @@ export function getLabelGetter( // No label exists, it will have to be generated for the first time. if (label === undefined) { const marker = getMarker(markerIndex); - const schemaName = getMarkerSchemaName(markerSchemaByName, marker); + const schemaName = getMarkerSchemaName( + markerSchemaByName, + marker.name, + marker.data + ); const applyLabel = labelFns.get(schemaName); label = applyLabel From 04ca59a3595e868c9ccc7c7a01addc9025ddeba9 Mon Sep 17 00:00:00 2001 From: Julien Wajsberg Date: Wed, 14 Dec 2022 11:36:52 +0100 Subject: [PATCH 2/6] Isolate the marker operations relative to URLs in their own block --- src/profile-logic/sanitize.js | 57 +++++++++++++++-------------------- 1 file changed, 25 insertions(+), 32 deletions(-) diff --git a/src/profile-logic/sanitize.js b/src/profile-logic/sanitize.js index 7b0f93e42a..7c4c93c9c0 100644 --- a/src/profile-logic/sanitize.js +++ b/src/profile-logic/sanitize.js @@ -274,40 +274,33 @@ function sanitizeThreadPII( markerTable.data[i] = removePrefMarkerPreferenceValues(currentMarker); } - // Remove the all network URLs if user wants to remove them. - if ( - PIIToBeRemoved.shouldRemoveUrls && - currentMarker && - currentMarker.type === 'Network' - ) { - // Remove the URI fields from marker payload. - markerTable.data[i] = removeNetworkMarkerURLs(currentMarker); - - // Strip the URL from the marker name - const stringIndex = markerTable.name[i]; - stringArray[stringIndex] = stringArray[stringIndex].replace(/:.*/, ''); - } + if (currentMarker && PIIToBeRemoved.shouldRemoveUrls) { + // Remove the all network URLs if user wants to remove them. + if (currentMarker.type === 'Network') { + // Remove the URI fields from marker payload. + markerTable.data[i] = removeNetworkMarkerURLs(currentMarker); + + // Strip the URL from the marker name + const stringIndex = markerTable.name[i]; + stringArray[stringIndex] = stringArray[stringIndex].replace( + /:.*/, + '' + ); + } - // Remove the all OS paths from FileIO markers if user wants to remove them. - if ( - PIIToBeRemoved.shouldRemoveUrls && - currentMarker && - currentMarker.type === 'FileIO' - ) { - // Remove the filename path from marker payload. - markerTable.data[i] = sanitizeFileIOMarkerFilenamePath(currentMarker); - } + // Remove the all OS paths from FileIO markers if user wants to remove them. + if (currentMarker.type === 'FileIO') { + // Remove the filename path from marker payload. + markerTable.data[i] = sanitizeFileIOMarkerFilenamePath(currentMarker); + } - if ( - PIIToBeRemoved.shouldRemoveUrls && - currentMarker && - currentMarker.type === 'Text' - ) { - // Sanitize all the name fields of text markers in case they contain URLs. - markerTable.data[i] = sanitizeTextMarker(currentMarker); - // Re-assign the value of currentMarker as the marker may be - // sanitized again to remove extension ids. - currentMarker = markerTable.data[i]; + if (currentMarker.type === 'Text') { + // Sanitize all the name fields of text markers in case they contain URLs. + markerTable.data[i] = sanitizeTextMarker(currentMarker); + // Re-assign the value of currentMarker as the marker may be + // sanitized again to remove extension ids. + currentMarker = markerTable.data[i]; + } } if ( From 65cab038aae9f60c575a4c503d61d6ff7a27c740 Mon Sep 17 00:00:00 2001 From: Julien Wajsberg Date: Mon, 12 Dec 2022 18:36:48 +0100 Subject: [PATCH 3/6] Implement sanitization using the marker schema Fixes #2757 Fixes #4107 --- src/profile-logic/marker-data.js | 35 +++++++++++++++++++++++++++++++ src/profile-logic/sanitize.js | 29 +++++++++++++++++++++---- src/selectors/publish.js | 2 ++ src/test/unit/sanitize.test.js | 36 +++++++++++++++++++++++++++++++- 4 files changed, 97 insertions(+), 5 deletions(-) diff --git a/src/profile-logic/marker-data.js b/src/profile-logic/marker-data.js index f61c1b20a6..1bfa209f5b 100644 --- a/src/profile-logic/marker-data.js +++ b/src/profile-logic/marker-data.js @@ -1320,6 +1320,41 @@ export function sanitizeExtensionTextMarker( return payload; } +export function sanitizeFromMarkerSchema( + markerSchema: MarkerSchema, + markerPayload: MarkerPayload +): MarkerPayload { + for (const propertyDescription of markerSchema.data) { + if ( + propertyDescription.key !== undefined && + propertyDescription.format !== undefined + ) { + const key = propertyDescription.key; + const format = propertyDescription.format; + if (!(key in markerPayload)) { + continue; + } + + // We're typing the result of the sanitization with `any` because Flow + // doesn't like much our enormous enum of non-exact objects that's used as + // MarkerPayload type, and this code is too generic for Flow in this context. + if (format === 'url') { + markerPayload = ({ + ...markerPayload, + [key]: removeURLs(markerPayload[key]), + }: any); + } else if (format === 'file-path') { + markerPayload = ({ + ...markerPayload, + [key]: removeFilePath(markerPayload[key]), + }: any); + } + } + } + + return markerPayload; +} + /** * Markers can be filtered by display area using the marker schema. Get a list of * marker "types" (the type field in the Payload) for a specific location. diff --git a/src/profile-logic/sanitize.js b/src/profile-logic/sanitize.js index 7c4c93c9c0..01996584aa 100644 --- a/src/profile-logic/sanitize.js +++ b/src/profile-logic/sanitize.js @@ -18,7 +18,9 @@ import { filterRawMarkerTableToRangeWithMarkersToDelete, sanitizeExtensionTextMarker, sanitizeTextMarker, + sanitizeFromMarkerSchema, } from './marker-data'; +import { getSchemaFromMarker } from './marker-schema'; import { filterThreadSamplesToRange } from './profile-data'; import type { Profile, @@ -30,6 +32,7 @@ import type { IndexIntoFrameTable, IndexIntoFuncTable, InnerWindowID, + MarkerSchemaByName, } from 'firefox-profiler/types'; export type SanitizeProfileResult = {| @@ -47,7 +50,8 @@ export type SanitizeProfileResult = {| export function sanitizePII( profile: Profile, derivedMarkerInfoForAllThreads: DerivedMarkerInfo[], - maybePIIToBeRemoved: RemoveProfileInformation | null + maybePIIToBeRemoved: RemoveProfileInformation | null, + markerSchemaByName: MarkerSchemaByName ): SanitizeProfileResult { if (maybePIIToBeRemoved === null) { // Nothing is sanitized. @@ -133,7 +137,8 @@ export function sanitizePII( threadIndex, PIIToBeRemoved, windowIdFromPrivateBrowsing, - windowIdFromActiveTab + windowIdFromActiveTab, + markerSchemaByName ); // Filtering out the current thread if it's null. @@ -227,7 +232,8 @@ function sanitizeThreadPII( threadIndex: number, PIIToBeRemoved: RemoveProfileInformation, windowIdFromPrivateBrowsing: Set, - windowIdFromActiveTab: Set + windowIdFromActiveTab: Set, + markerSchemaByName: MarkerSchemaByName ): Thread | null { if (PIIToBeRemoved.shouldRemoveThreads.has(threadIndex)) { // If this is a hidden thread, remove the thread immediately. @@ -275,7 +281,22 @@ function sanitizeThreadPII( } if (currentMarker && PIIToBeRemoved.shouldRemoveUrls) { - // Remove the all network URLs if user wants to remove them. + // Use the schema to find some properties that need to be sanitized. + const markerNameIndex = markerTable.name[i]; + const markerName = thread.stringTable.getString(markerNameIndex); + const markerSchema = getSchemaFromMarker( + markerSchemaByName, + markerName, + currentMarker + ); + if (markerSchema) { + currentMarker = markerTable.data[i] = sanitizeFromMarkerSchema( + markerSchema, + currentMarker + ); + } + + // Remove the network URLs if user wants to remove them. if (currentMarker.type === 'Network') { // Remove the URI fields from marker payload. markerTable.data[i] = removeNetworkMarkerURLs(currentMarker); diff --git a/src/selectors/publish.js b/src/selectors/publish.js index 23d9ad0dd5..914b222679 100644 --- a/src/selectors/publish.js +++ b/src/selectors/publish.js @@ -16,6 +16,7 @@ import { getContainsPrivateBrowsingInformation, getThreads, getActiveTabID, + getMarkerSchemaByName, } from './profile'; import { compress } from '../utils/gz'; import { serializeProfile } from '../profile-logic/process-profile'; @@ -222,6 +223,7 @@ export const getSanitizedProfile: Selector = getProfile, getDerivedMarkerInfoForAllThreads, getRemoveProfileInformation, + getMarkerSchemaByName, sanitizePII ); diff --git a/src/test/unit/sanitize.test.js b/src/test/unit/sanitize.test.js index 87e6337769..899645748d 100644 --- a/src/test/unit/sanitize.test.js +++ b/src/test/unit/sanitize.test.js @@ -65,10 +65,44 @@ describe('sanitizePII', function () { } ); + const markerSchemaByName = { + FileIO: { + name: 'FileIO', + display: ['marker-chart', 'marker-table', 'timeline-fileio'], + data: [ + { + key: 'operation', + label: 'Operation', + format: 'string', + searchable: true, + }, + { + key: 'source', + label: 'Source', + format: 'string', + searchable: true, + }, + { + key: 'filename', + label: 'Filename', + format: 'file-path', + searchable: true, + }, + { + key: 'threadId', + label: 'Thread ID', + format: 'string', + searchable: true, + }, + ], + }, + }; + const sanitizedProfile = sanitizePII( originalProfile, derivedMarkerInfoForAllThreads, - PIIToRemove + PIIToRemove, + markerSchemaByName ).profile; return { From 2caeef66af7afc61ab6542e2505f771fdec6b7d5 Mon Sep 17 00:00:00 2001 From: Julien Wajsberg Date: Mon, 12 Dec 2022 18:35:02 +0100 Subject: [PATCH 4/6] Remove the special handling for FileIO markers as this is now handled by the marker schema generic handling --- src/profile-logic/marker-data.js | 17 ----------------- src/profile-logic/sanitize.js | 7 ------- 2 files changed, 24 deletions(-) diff --git a/src/profile-logic/marker-data.js b/src/profile-logic/marker-data.js index 1bfa209f5b..4ea8a869f6 100644 --- a/src/profile-logic/marker-data.js +++ b/src/profile-logic/marker-data.js @@ -35,7 +35,6 @@ import type { IPCMarkerPayload, NetworkPayload, PrefMarkerPayload, - FileIoPayload, TextMarkerPayload, StartEndRange, IndexedArray, @@ -1268,22 +1267,6 @@ export function removePrefMarkerPreferenceValues( return { ...payload, prefValue: '' }; } -/** - * Sanitize FileIO marker's filename property if it's non-empty. - */ -export function sanitizeFileIOMarkerFilenamePath( - payload: FileIoPayload -): FileIoPayload { - if (!payload.filename) { - return payload; - } - - return { - ...payload, - filename: removeFilePath(payload.filename), - }; -} - /** * Sanitize Text marker's name property for potential URLs. */ diff --git a/src/profile-logic/sanitize.js b/src/profile-logic/sanitize.js index 01996584aa..e19f85b770 100644 --- a/src/profile-logic/sanitize.js +++ b/src/profile-logic/sanitize.js @@ -14,7 +14,6 @@ import { removeURLs } from '../utils/string'; import { removeNetworkMarkerURLs, removePrefMarkerPreferenceValues, - sanitizeFileIOMarkerFilenamePath, filterRawMarkerTableToRangeWithMarkersToDelete, sanitizeExtensionTextMarker, sanitizeTextMarker, @@ -309,12 +308,6 @@ function sanitizeThreadPII( ); } - // Remove the all OS paths from FileIO markers if user wants to remove them. - if (currentMarker.type === 'FileIO') { - // Remove the filename path from marker payload. - markerTable.data[i] = sanitizeFileIOMarkerFilenamePath(currentMarker); - } - if (currentMarker.type === 'Text') { // Sanitize all the name fields of text markers in case they contain URLs. markerTable.data[i] = sanitizeTextMarker(currentMarker); From 4b37ed11d78d751edc9823dc90c085b4826387e0 Mon Sep 17 00:00:00 2001 From: Julien Wajsberg Date: Mon, 12 Dec 2022 18:35:20 +0100 Subject: [PATCH 5/6] Remove outdated comments now that this is implemented --- src/types/markers.js | 2 -- 1 file changed, 2 deletions(-) diff --git a/src/types/markers.js b/src/types/markers.js index e8c2921343..ab2982317b 100644 --- a/src/types/markers.js +++ b/src/types/markers.js @@ -20,9 +20,7 @@ export type MarkerFormatType = // String types. // Show the URL, and handle PII sanitization - // TODO Handle PII sanitization. Issue #2757 | 'url' - // TODO Handle PII sanitization. Issue #2757 // Show the file path, and handle PII sanitization. | 'file-path' // Important, do not put URL or file path information here, as it will not be From cde1a3ae3125102e3e18ec94f4b55279f8136430 Mon Sep 17 00:00:00 2001 From: Julien Wajsberg Date: Wed, 14 Dec 2022 11:01:58 +0100 Subject: [PATCH 6/6] Add a new test about sanitizing a marker with a Url payload --- src/test/unit/merge-compare.test.js | 2 +- src/test/unit/sanitize.test.js | 43 +++++++++++++++++++++++++++++ src/types/markers.js | 9 +++++- 3 files changed, 52 insertions(+), 2 deletions(-) diff --git a/src/test/unit/merge-compare.test.js b/src/test/unit/merge-compare.test.js index 57fd303484..a35983e0d3 100644 --- a/src/test/unit/merge-compare.test.js +++ b/src/test/unit/merge-compare.test.js @@ -481,7 +481,7 @@ describe('mergeThreads function', function () { // Check if we properly merged the string tables and have the correct url fields. const markerUrlsAfterMerge = mergedMarkers.data.map((markerData) => - markerData && 'url' in markerData && markerData.url + markerData && 'url' in markerData && typeof markerData.url === 'number' ? markerData.url : null ); diff --git a/src/test/unit/sanitize.test.js b/src/test/unit/sanitize.test.js index 899645748d..8265d53d18 100644 --- a/src/test/unit/sanitize.test.js +++ b/src/test/unit/sanitize.test.js @@ -96,6 +96,17 @@ describe('sanitizePII', function () { }, ], }, + Url: { + name: 'Url', + tableLabel: '{marker.name} - {marker.data.url}', + display: ['marker-chart', 'marker-table'], + data: [ + { + key: 'url', + format: 'url', + }, + ], + }, }; const sanitizedProfile = sanitizePII( @@ -582,6 +593,38 @@ describe('sanitizePII', function () { expect(marker2.filename).toBe('\\' + marker2File); }); + it('should sanitize the URL properties in markers', function () { + const { sanitizedProfile } = setup( + { + shouldRemoveUrls: true, + }, + getProfileWithMarkers([ + [ + 'SpeculativeConnect', + 1, + 2, + { + type: 'Url', + url: 'https://img-getpocket.cdn.mozilla.net', + }, + ], + ]) + ); + expect(sanitizedProfile.threads.length).toEqual(1); + const thread = sanitizedProfile.threads[0]; + expect(thread.markers.length).toEqual(1); + + const marker = thread.markers.data[0]; + + // The url fields should still be there + if (!marker || !marker.url) { + throw new Error('Failed to find url property in the payload'); + } + + // Now check the url fields and make sure they are sanitized. + expect(marker.url).toBe('https://'); + }); + it('should sanitize the eTLD+1 field if urls are supposed to be sanitized', function () { // Create a simple profile with eTLD+1 field in its thread. const { profile } = getProfileFromTextSamples('A'); diff --git a/src/types/markers.js b/src/types/markers.js index ab2982317b..2d9ee679a7 100644 --- a/src/types/markers.js +++ b/src/types/markers.js @@ -732,6 +732,11 @@ export type NoPayloadUserData = {| innerWindowID?: number, |}; +export type UrlMarkerPayload = {| + type: 'Url', + url: string, +|}; + /** * The union of all the different marker payloads that profiler.firefox.com knows about, * this is not guaranteed to be all the payloads that we actually get from the Gecko @@ -764,7 +769,8 @@ export type MarkerPayload = | MediaSampleMarkerPayload | JankPayload | BrowsertimeMarkerPayload - | NoPayloadUserData; + | NoPayloadUserData + | UrlMarkerPayload; export type MarkerPayload_Gecko = | GPUMarkerPayload @@ -786,6 +792,7 @@ export type MarkerPayload_Gecko = | IPCMarkerPayload_Gecko | MediaSampleMarkerPayload | NoPayloadUserData + | UrlMarkerPayload // The following payloads come in with a stack property. During the profile processing // the "stack" property is are converted into a "cause". See the CauseBacktrace type // for more information.