Skip to content

Commit 7c224cc

Browse files
authored
feat: support the vfps pseudonym service (#59)
* feat: support for the vfps pseudonym service * test: improved code coverage
1 parent 91a8b6e commit 7c224cc

40 files changed

+2110
-630
lines changed

.checkov.yml

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
skip-check:
2+
- CKV_DOCKER_3
3+
- CKV_DOCKER_2
4+
# CKV_K8S_21: "The default namespace should not be used" - used for simple testing inside a KinD cluster
5+
- CKV_K8S_21
6+
# CKV_K8S_10: "CPU requests should be set" - ignored for iter8 job pod
7+
- CKV_K8S_10
8+
# CKV_K8S_11: "CPU limits should be set" - ignored for iter8 job pod
9+
- CKV_K8S_11
10+
# CKV_K8S_12: "Memory requests should be set"
11+
- CKV_K8S_12
12+
# CKV_K8S_13: "Memory limits should be set" - ignored for iter8 job pod
13+
- CKV_K8S_13
14+
# CKV_K8S_15: "Image Pull Policy should be Always" - ignored for digest-pinned iter8
15+
- CKV_K8S_15
16+
# CKV_K8S_12: "Memory requests should be set" - ignored for iter8
17+
- CKV_K8S_12
18+
# CKV_K8S_38: "Ensure that Service Account Tokens are only mounted where necessary" - necessary for iter8
19+
- CKV_K8S_38

.github/workflows/ci.yaml

Lines changed: 129 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ jobs:
1818
pull-requests: write
1919
outputs:
2020
image-tags: ${{ steps.container_meta.outputs.tags }}
21+
image-version: ${{ steps.container_meta.outputs.version }}
2122
steps:
2223
- name: Docker meta
2324
id: container_meta
@@ -112,14 +113,15 @@ jobs:
112113
env:
113114
IS_PULL_REQUEST: ${{ github.event_name == 'pull_request' }}
114115

115-
- name: Build and push
116-
uses: docker/build-push-action@v3
116+
- name: Build and push image
117+
uses: docker/build-push-action@c84f38281176d4c9cdb1626ffafcd6b3911b5d94 # tag=v3
117118
with:
118-
cache-from: type=gha
119-
cache-to: type=gha,mode=max
119+
load: ${{ github.event_name == 'pull_request' }}
120120
push: ${{ github.event_name != 'pull_request' }}
121121
tags: ${{ steps.container_meta.outputs.tags }}
122122
labels: ${{ steps.container_meta.outputs.labels }}
123+
cache-from: type=gha
124+
cache-to: type=gha,mode=max
123125
platforms: ${{ steps.platforms.outputs.platforms }}
124126

125127
- name: Add Coverage PR Comment
@@ -129,6 +131,129 @@ jobs:
129131
recreate: true
130132
path: code-coverage-results.md
131133

134+
- name: Save container image as tar archives
135+
if: ${{ github.event_name == 'pull_request' }}
136+
env:
137+
IMAGE: ${{ fromJson(steps.container_meta.outputs.json).tags[0] }}
138+
run: |
139+
docker save "$IMAGE" -o /tmp/image.tar
140+
141+
- name: Upload container image
142+
if: ${{ github.event_name == 'pull_request' }}
143+
uses: actions/upload-artifact@3cea5372237819ed00197afe530f5a7ea3e805c8 # tag=v3.1.0
144+
with:
145+
name: container-image
146+
path: |
147+
/tmp/image.tar
148+
149+
run-iter8-tests:
150+
name: run iter8 tests
151+
runs-on: ubuntu-22.04
152+
if: ${{ github.event_name == 'pull_request' }}
153+
needs:
154+
- build
155+
permissions:
156+
contents: read
157+
pull-requests: write
158+
steps:
159+
- name: Checkout
160+
uses: actions/checkout@2541b1294d2704b0964813337f33b291d3f8596b # tag=v3
161+
162+
- uses: iter8-tools/iter8@v0.11
163+
164+
- name: Create KinD cluster
165+
uses: helm/kind-action@9e8295d178de23cbfbd8fa16cf844eec1d773a07 # tag=v1.4.0
166+
with:
167+
cluster_name: kind
168+
169+
- name: Download container images
170+
uses: actions/download-artifact@fb598a63ae348fa914e94cd0ff38f362e927b741 # tag=v3.0.0
171+
with:
172+
name: container-image
173+
path: /tmp
174+
175+
- name: Load image into KinD
176+
run: |
177+
kind load image-archive /tmp/image.tar
178+
179+
- name: List images in cluster
180+
run: docker exec kind-control-plane crictl images
181+
182+
- name: Install the latest version of vfps as a pseudonymization service
183+
run: |
184+
helm repo add chgl https://chgl.github.io/charts
185+
helm install \
186+
--wait \
187+
--timeout=10m \
188+
vfps chgl/vfps
189+
190+
- name: Install "fhir-pseudonymizer"
191+
env:
192+
IMAGE_TAG: ${{ needs.build.outputs.image-version }}
193+
run: |
194+
helm repo add miracum https://miracum.github.io/charts
195+
helm install \
196+
--set="image.tag=${IMAGE_TAG}" \
197+
-f tests/iter8/values.yaml \
198+
--wait \
199+
--timeout=10m \
200+
fhir-pseudonymizer miracum/fhir-pseudonymizer
201+
202+
- name: Launch iter8 experiment
203+
run: kubectl apply -f tests/iter8/experiment.yaml
204+
205+
- name: Wait for experiment completion
206+
run: iter8 k assert -c completed --timeout 10m
207+
208+
- name: Assert no failures and SLOs are satisfied
209+
run: iter8 k assert -c nofailure,slos
210+
211+
- name: Create iter8 reports
212+
if: always()
213+
run: |
214+
iter8 k report | tee iter8-report.txt
215+
iter8 k report -o html > iter8-report.html
216+
217+
- name: Enhance iter8 report output for use as a PR comment
218+
run: |
219+
ITER8_REPORT_TXT=$(cat iter8-report.txt)
220+
{
221+
echo -e '---';
222+
echo -e '## iter8 report';
223+
echo -e '```console';
224+
echo -e "${ITER8_REPORT_TXT}";
225+
echo -e '```'
226+
} >> iter8-output.md
227+
228+
- name: Append sticky comment with iter8 report
229+
uses: marocchino/sticky-pull-request-comment@39c5b5dc7717447d0cba270cd115037d32d28443 # tag=v2.2.0
230+
if: ${{ github.event_name == 'pull_request' }}
231+
with:
232+
append: true
233+
path: iter8-output.md
234+
235+
- name: Upload report
236+
if: always()
237+
uses: actions/upload-artifact@3cea5372237819ed00197afe530f5a7ea3e805c8 # tag=v3.1.0
238+
with:
239+
name: iter8-report.html
240+
path: |
241+
iter8-report.html
242+
243+
- name: Print cluster and iter8 logs
244+
if: always()
245+
run: |
246+
kubectl cluster-info dump -o yaml | tee kind-cluster-dump.txt
247+
iter8 k log -l trace
248+
249+
- name: Upload cluster dump
250+
if: always()
251+
uses: actions/upload-artifact@3cea5372237819ed00197afe530f5a7ea3e805c8 # tag=v3.1.0
252+
with:
253+
name: kind-cluster-dump.txt
254+
path: |
255+
kind-cluster-dump.txt
256+
132257
release:
133258
needs: build
134259
name: Release

.github/workflows/mega-linter.yml

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -46,7 +46,7 @@ jobs:
4646
# Upload MegaLinter artifacts
4747
- name: Archive production artifacts
4848
if: ${{ always() }}
49-
uses: actions/upload-artifact@v2
49+
uses: actions/upload-artifact@3cea5372237819ed00197afe530f5a7ea3e805c8 # tag=v3.1.0
5050
with:
5151
name: MegaLinter reports
5252
path: |

.mega-linter.yml

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,3 +18,6 @@ FILEIO_REPORTER: false
1818

1919
BASH_SHFMT_ARGUMENTS:
2020
- "--indent=2"
21+
22+
REPOSITORY_TRIVY_ARGUMENTS:
23+
- "--severity='MEDIUM,HIGH,CRITICAL'"

.protolintrc.yml

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
lint:
2+
rules_option:
3+
max_line_length:
4+
max_chars: 120
5+
indent:
6+
not_insert_newline: true

.trivyignore

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,3 @@
1+
# iter8 requires access to secrets
2+
AVD-KSV-0041
3+
KSV041

Dockerfile

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,8 @@ RUN dotnet test \
2727
--configuration=Release \
2828
--collect:"XPlat Code Coverage" \
2929
--results-directory=./coverage \
30-
-l "console;verbosity=detailed"
30+
-l "console;verbosity=detailed" \
31+
--settings=runsettings.xml
3132

3233
FROM runtime
3334
COPY --from=build /build/publish/*anonymization.yaml /etc

README.md

Lines changed: 71 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,7 @@
44

55
> Send a FHIR® resource to `/fhir/$de-identify` get it back anonymized and/or pseudonymized.
66
7-
Based on the brilliant [FHIR Tools for Anonymization](https://github.com/microsoft/FHIR-Tools-for-Anonymization/).
7+
Based on the brilliant [Tools for Health Data Anonymization](https://github.com/microsoft/Tools-for-Health-Data-Anonymization).
88

99
## Usage
1010

@@ -19,7 +19,7 @@ Container images are pushed to the following registries:
1919
- `quay.io/miracum/fhir-pseudonymizer:latest`
2020
- `harbor.miracum.org/miracum-etl/fhir-pseudonymizer:latest`
2121

22-
For deployment in Kubernetes see <https://github.com/miracum/charts/tree/master/charts/fhir-gateway> for a Helm Chart using the FHIR Pseudonymizer as one of its components.
22+
For deployment in Kubernetes see <https://github.com/miracum/charts/tree/master/charts/fhir-pseudonymizer> for a Helm Chart deploying the FHIR Pseudonymizer.
2323

2424
### API Endpoints
2525

@@ -29,11 +29,11 @@ An OpenAPI definition for the FHIR operation endpoints is available at `/swagger
2929

3030
#### `$de-identify`
3131

32-
The server provides a `/fhir/$de-identify` operation to de-identfiy received FHIR resources according to the configuration in the [anonymization.yaml](src/FhirPseudonymizer/anonymization.yaml) rules. See <https://github.com/microsoft/FHIR-Tools-for-Anonymization/> for more details on the anonymization rule configuration.
32+
The server provides a `/fhir/$de-identify` operation to de-identfiy received FHIR resources according to the configuration in the [anonymization.yaml](src/FhirPseudonymizer/anonymization.yaml) rules. See [Tools for Health Data Anonymization](https://github.com/microsoft/Tools-for-Health-Data-Anonymization) for more details on the anonymization rule configuration.
3333

3434
The service comes with a sample configuration file to help meet the requirements of HIPAA Safe Harbor Method (2)(i): [hipaa-anonymization.yaml](src/FhirPseudonymizer/hipaa-anonymization.yaml).This configuration can be used by setting `ANONYMIZATIONENGINECONFIGPATH=/etc/hipaa-anonymization.yaml`.
3535

36-
A new `pseudonymize` method was added to the default list of anonymization methods linked above. It uses [gPAS](https://www.ths-greifswald.de/en/researchers-general-public/gpas/) to create pseudonyms and replace the values in the resource with them.
36+
A new `pseudonymize` method was added to the default list of anonymization methods linked above. It uses either [gPAS](https://www.ths-greifswald.de/en/researchers-general-public/gpas/) or [Vfps](https://github.com/chgl/vfps) to create pseudonyms and replace the values in the resource with them.
3737
For example, the following rule replaces all identifiers of type `http://terminology.hl7.org/CodeSystem/v2-0203|MR` with a pseudonym generated in the `PATIENT` domain.
3838

3939
```yaml
@@ -45,7 +45,10 @@ fhirPathRules:
4545
4646
Note that if the `domain` setting is omitted, and an ID or reference is pseudonymized, then the resource name is used as the pseudonym domain. For example, pseudonymizing `"reference": "Patient/123"` will try to create a pseudonym for `123` in the `Patient` domain.
4747

48-
Note that all methods defined in [FHIR-Tools-for-Anonymization](https://github.com/microsoft/FHIR-Tools-for-Anonymization/) are supported. For example, to clamp a patient's birthdate if they were born before January 1st 1931 to 01/01/1930, use:
48+
When using [Vfps](https://github.com/chgl/vfps), the `domain` setting can instead also be set as `namespace`.
49+
50+
Note that all methods defined in [Tools for Health Data Anonymization](https://github.com/microsoft/Tools-for-Health-Data-Anonymization) are supported.
51+
For example, to clamp a patient's birthdate if they were born before January 1st 1931 to 01/01/1930, use:
4952

5053
```yaml
5154
fhirPathRules:
@@ -77,16 +80,35 @@ Additionally, there are some optional configuration values that can be set as en
7780

7881
| Environment Variable | Description | Default |
7982
| --------------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ | --------------------------- |
80-
| `gPAS__Url` | The gPAS TTP FHIR Gateway URL. Only required if any of the anonymization.yaml rules use the `pseudonymize` method. | `""` |
81-
| `gPAS__Auth__Basic__Username` | The HTTP basic auth username to connect to gPAS | `""` |
82-
| `gPAS__Auth__Basic__Password` | The HTTP basic auth password to connect to gPAS | `""` |
8383
| `AnonymizationEngineConfigPath` | Path to the `anonymization.yaml` that contains the rules to transform the resources. | `"/etc/anonymization.yaml"` |
8484
| `ApiKey` | Key that must be set in the `X-Api-Key` header to allow requests to protected endpoints. | `""` |
85-
| `gPAS__Version` | Version of gPAS to support. There were breaking changes to the FHIR API in 1.10.2 and 1.10.3, so explicitely set this value if you are using a later version than 1.10.1. | `"1.10.1"` |
8685
| `UseSystemTextJsonFhirSerializer` | Enable the new `System.Text.Json`-based FHIR serializer to significantly [improve throughput and latencies](#usesystemtextjsonfhirserializer). See <https://github.com/FirelyTeam/firely-net-sdk/releases/tag/v4.0.0-r4> | `false` |
86+
| `PseudonymizationService` | The type of pseudonymization service to use. Can be one of `gPAS`, `Vfps`, `None` | `"gPAS"` |
8787

8888
See [appsettings.json](src/FhirPseudonymizer/appsettings.json) for additional options.
8989

90+
The application supports pseudonymization using either [gPAS](https://www.ths-greifswald.de/forscher/gpas/) or [Vfps](https://github.com/chgl/vfps) which can be configured via the `PseudonymizationService` setting.
91+
Service-specific configuration settings are listed below.
92+
93+
### gPAS
94+
95+
| Environment Variable | Description | Default |
96+
| ----------------------------- | ------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ---------- |
97+
| `gPAS__Url` | The gPAS TTP FHIR Gateway URL. Only required if any of the anonymization.yaml rules use the `pseudonymize` method. | `""` |
98+
| `gPAS__Auth__Basic__Username` | The HTTP basic auth username to connect to gPAS | `""` |
99+
| `gPAS__Auth__Basic__Password` | The HTTP basic auth password to connect to gPAS | `""` |
100+
| `gPAS__Version` | Version of gPAS to support. There were breaking changes to the FHIR API in 1.10.2 and 1.10.3, so explicitely set this value if you are using a later version than 1.10.1. | `"1.10.1"` |
101+
102+
### Vfps
103+
104+
| Environment Variable | Description | Default |
105+
| ----------------------------------------------- | ---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- | ------- |
106+
| `Vfps__Address` | The Vfps service address. Use `dns:///` scheme for client-side load-balancing. | `""` |
107+
| `Vfps__UnsafeUseInsecureChannelCallCredentials` | If set to `true`, `CallCredentials` are applied to gRPC calls made by an insecure channel. Sending authentication headers over an insecure connection has security implications and shouldn't be done in production environments. | `true` |
108+
| `Vfps__UseTls` | If set to `true`, creates client-side SSL credentials loaded from disk file pointed to by the `GRPC_DEFAULT_SSL_ROOTS_FILE_PATH` environment variable. If that fails, gets the roots certificates from a well known place on disk. | `false` |
109+
| `Vfps__Auth__Basic__Username` | The HTTP basic auth username to connect to the Vfps service. Used in the `Authorization: Basic` metadata header value for the gRPC calls. | `""` |
110+
| `Vfps__Auth__Basic__Password` | The HTTP basic auth password to connect to the Vfps service. | `""` |
111+
90112
## Dynamic rule settings
91113

92114
Anonymization and pseudonymization rules in the `anonymization.yaml` config file can be overridden and/or extended on a per request basis.
@@ -217,6 +239,44 @@ pre-commit install
217239
pre-commit install --hook-type commit-msg
218240
```
219241

242+
### Run iter8 SLO experiments locally
243+
244+
```sh
245+
kind create cluster
246+
247+
export IMAGE_TAG="iter8-test"
248+
249+
docker build -t ghcr.io/miracum/fhir-pseudonymizer:${IMAGE_TAG} .
250+
251+
kind load docker-image ghcr.io/miracum/fhir-pseudonymizer:${IMAGE_TAG}
252+
253+
helm repo add chgl https://chgl.github.io/charts
254+
helm repo add miracum https://miracum.github.io/charts
255+
helm repo update
256+
257+
helm install \
258+
--wait \
259+
--timeout=10m \
260+
vfps chgl/vfps
261+
262+
helm upgrade --install \
263+
--set="image.tag=${IMAGE_TAG}" \
264+
-f tests/iter8/values.yaml \
265+
--wait \
266+
--timeout=10m \
267+
fhir-pseudonymizer miracum/fhir-pseudonymizer
268+
269+
kubectl apply -f tests/iter8/experiment.yaml
270+
271+
iter8 k assert -c completed --timeout 15m
272+
iter8 k assert -c nofailure,slos
273+
iter8 k report
274+
275+
# to restart:
276+
kubectl delete job default-1-job
277+
kubectl apply -f tests/iter8/experiment.yaml
278+
```
279+
220280
## Benchmark
221281

222282
> **Note**
@@ -300,8 +360,8 @@ cosign verify --key https://miracum.github.io/cosign.pub ghcr.io/miracum/fhir-ps
300360
## Semantic versioning exclusion policies
301361

302362
The project's versioning follows the [SemVer](https://semver.org/) convention.
303-
However, we exclude metrics (ie. anything under the `/metrics` endpoint), traces,
304-
and the contents of the container image from this. Alwas be prepared to double-check the release notes before updating.
363+
However, we exclude metrics (ie. anything under the `/metrics` endpoint), traces, and the contents of the container image from this.
364+
Alwas be prepared to double-check the release notes before updating.
305365

306366
## Attribution
307367

benchmark/bombardier.sh

Lines changed: 6 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -3,8 +3,9 @@
33
RESOURCE_PATH=${RESOURCE_PATH:-bundle.json}
44

55
bombardier -f "${RESOURCE_PATH}" \
6-
-H "Content-Type:application/fhir+json" \
7-
-m POST \
8-
-d 60s \
9-
-l \
10-
"http://localhost:5000/fhir/\$de-identify"
6+
--timeout=10s \
7+
-H "Content-Type:application/fhir+json" \
8+
-m POST \
9+
-d 60s \
10+
-l \
11+
"http://localhost:5000/fhir/\$de-identify"

0 commit comments

Comments
 (0)