plinder-org
diff --git a/‎.gitignore‎
Lines changed: 1 addition & 0 deletions b/‎.gitignore‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎column_descriptions/posebusters_checks.tsv‎
Lines changed: 123 additions & 124 deletions b/‎column_descriptions/posebusters_checks.tsv‎
Lines changed: 123 additions & 124 deletions
diff --git a/‎docker-compose.yml‎
Lines changed: 1 addition & 1 deletion b/‎docker-compose.yml‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎docs/evaluation.md‎
Lines changed: 64 additions & 13 deletions b/‎docs/evaluation.md‎
Lines changed: 64 additions & 13 deletions
diff --git a/‎flows/docker.py‎
Lines changed: 1 addition & 1 deletion b/‎flows/docker.py‎
Lines changed: 1 addition & 1 deletion
diff --git a/‎pyproject.toml‎
Lines changed: 2 additions & 2 deletions b/‎pyproject.toml‎
Lines changed: 2 additions & 2 deletions
diff --git a/‎src/plinder/core/utils/config.py‎
Lines changed: 5 additions & 0 deletions b/‎src/plinder/core/utils/config.py‎
Lines changed: 5 additions & 0 deletions
diff --git a/‎src/plinder/eval/__init__.py‎
Lines changed: 14 additions & 0 deletions b/‎src/plinder/eval/__init__.py‎
Lines changed: 14 additions & 0 deletions
diff --git a/‎src/plinder/eval/docking/stratify_test_set.py‎
Lines changed: 6 additions & 6 deletions b/‎src/plinder/eval/docking/stratify_test_set.py‎
Lines changed: 6 additions & 6 deletions
diff --git a/‎src/plinder/eval/docking/write_scores.py‎
Lines changed: 2 additions & 0 deletions b/‎src/plinder/eval/docking/write_scores.py‎
Lines changed: 2 additions & 0 deletions
@@ -1,4 +1,5 @@
 reports/
+test_eval/
 tox_conda*
 
 # Byte-compiled / optimized / DLL files
 
@@ -26,4 +26,4 @@ services:
       - ./.coveragerc:/app/.coveragerc
       - ./reports/:/app/reports/
       - ./examples/:/app/examples/
-    command: /bin/bash -c "python -m pytest -v && cp .coverage reports/.coverage"
+    command: /bin/bash -c "python -m pytest -n auto -v && cp .coverage reports/.coverage"
@@ -6,6 +6,11 @@ sd_hide_title: true
 
 ## Evaluating docking poses across a stratified test set
 
+The `plinder.eval` subpackage allows (1) assessing protein-ligand complex predictions against reference `plinder` systems, and
+(2) correlating the performance of these predictions against the level of similarity of each test system to the corresponding training set.
+
+The output file from running the scripts `src/plinder/eval/docking/write_scores.py` and `src/plinder/eval/docking/stratify_test_set.py` generates the same evaluation metrics as the ones we have on the public leaderboard.
+
 The `plinder-eval` package allows
 
 1. assessing protein-ligand complex predictions against reference `plinder` systems, and
@@ -26,17 +31,17 @@ leaderboard (coming soon).
 - `confidence`: Optional score associated with the pose
 - `ligand_file`: Path to the SDF file of the pose
 
-`split.csv` with `system_id` and `split` columns mapping PLINDER systems to `train`, or `test`.
+`split.parquet` with, at a minimum, `system_id` and `split` columns mapping PLINDER systems to `train`, or `test`.
 
 ### Commands
 
 #### Write scores
 
 ```bash
-python src/plinder/eval/docking/write_scores.py --prediction_file predictions.csv --data_dir PLINDER_DATA_DIR --output_dir scores --num_processes 64
+plinder_eval --prediction_file tests/test_data/eval/predictions.csv --data_dir tests/test_data/eval --output_dir test_eval/ --num_processes 8
 ```
 
-This calculates accuracy metrics for all predicted poses compared to the reference. JSON files of each pose are stored in `scores/scores` and the summary file across all poses is stored in `scores.parquet`.
+This calculates accuracy metrics for all predicted poses compared to the reference. JSON files of each pose are stored in `test_eval/scores` and the summary file across all poses is stored in `test_eval/scores.parquet`.
 
 The predicted pose is compared to the reference system and the following ligand scores are calculated:
 
@@ -69,24 +74,70 @@ For oligomeric complexes:
 
 If `score_posebusters` is True, all posebusters checks are saved.
 
+You can inspect the results at `test_eval/scores.parquet`
+
+```python
+>>> import pandas as pd
+>>> df = pd.read_parquet("test_eval/scores.parquet")
+>>> df.T
+                                                   0                      1
+model                              1a3b__1__1.B__1.D  1ai5__1__1.A_1.B__1.D
+reference                          1a3b__1__1.B__1.D  1ai5__1__1.A_1.B__1.D
+num_reference_ligands                              1                      1
+num_model_ligands                                  1                      1
+num_reference_proteins                             1                      2
+num_model_proteins                                 1                      2
+fraction_reference_ligands_mapped                1.0                    1.0
+fraction_model_ligands_mapped                    1.0                    1.0
+lddt_pli_ave                                0.889506               0.557841
+lddt_pli_wave                               0.889506               0.557841
+lddt_pli_amd_ave                             0.85815               0.510695
+lddt_pli_amd_wave                            0.85815               0.510695
+scrmsd_ave                                  1.617184               3.665143
+scrmsd_wave                                 1.617184               3.665143
+rank                                               1                      1
+```
+
 #### Write test stratification data
 
 (This command will not need to be run by a user, the `test_set.parquet` and `val_set.parquet` file will be provided with the split release)
 
 ```bash
-python src/plinder/eval/docking/stratify_test_set.py --split_file split.csv --data_dir PLINDER_DATA_DIR --output_dir test_data --num_processes 16
+plinder_stratify --split_file split.csv --data_dir PLINDER_DATA_DIR --output_dir test_data
 ```
 
 Makes `test_data/test_set.parquet` which
 
 - Labels the maximum similarity of each test system to the training set across all the similarity metrics
-- Stratifies the test set based on training set similarity into `novel_pocket_pli`, `novel_pocket_ligand`, `novel_protein`, `novel_all`, and `not_novel`
-- Labels test systems with high quality
-
-#### Write evaluation results
-
-```bash
-python src/plinder/eval/docking/make_plots.py --score_file scores/scores.parquet --data_file test_data/test_set.parquet --output_dir results
+- Stratifies the test set based on training set similarity into `novel_pocket_pli`, `novel_ligand_pli`, `novel_protein`, `novel_ligand`, `novel_all` and `not_novel`
+- Labels test systems with high quality.
+
+To inspect the result of the run, do:
+```python
+>>> import pandas as pd
+>>> df = pd.read_parquet("test_eval/test_set.parquet")
+>>> df.T
+                                                  0                      1
+system_id                         1a3b__1__1.B__1.D  1ai5__1__1.A_1.B__1.D
+pli_qcov                                        0.0                    0.0
+protein_seqsim_qcov_weighted_sum                0.0                    0.0
+protein_seqsim_weighted_sum                     0.0                    0.0
+protein_fident_qcov_weighted_sum                0.0                    0.0
+protein_fident_weighted_sum                     0.0                    0.0
+protein_lddt_qcov_weighted_sum                  0.0                    0.0
+protein_lddt_weighted_sum                       0.0                    0.0
+protein_qcov_weighted_sum                       0.0                    0.0
+pocket_fident_qcov                              0.0                    0.0
+pocket_fident                                   0.0                    0.0
+pocket_lddt_qcov                                0.0                    0.0
+pocket_lddt                                     0.0                    0.0
+pocket_qcov                                     0.0                    0.0
+tanimoto_similarity_max                         0.0                    0.0
+passes_quality                                False                  False
+novel_pocket_pli                               True                   True
+novel_pocket_ligand                            True                   True
+novel_protein                                  True                   True
+novel_all                                      True                   True
+not_novel                                     False                  False
+>>>
 ```
-
-Writes out results.csv and plots of performance as a function of training set similarity across different similarity metrics.
 
@@ -275,7 +275,7 @@ def test_image(
     cmd.append("test")
     if args is not None and len(args):
         cmd.extend(
-            split(f'''/bin/bash -c "python -m pytest -v {' '.join(args)} && cp .coverage reports/.coverage"''')
+            split(f'''/bin/bash -c "python -m pytest -n auto -v {' '.join(args)} && cp .coverage reports/.coverage"''')
         )
     Proc(cmd, env=env).execute()
     if push:
 
@@ -36,8 +36,8 @@ dependencies = [
 
 [project.scripts]
 plinder_download = "plinder.core.index.utils:download_plinder_cmd"
-plinder_eval = "plinder.eval.run:main"
-plinder_create_submission = "plinder.eval.create_submission:main"
+plinder_eval = "plinder.eval.docking.write_scores:main"
+plinder_stratify = "plinder.eval.docking.stratify_test_set:main"
 
 [project.optional-dependencies]
 lint = [
 
@@ -69,6 +69,11 @@ class _get_config:
     _packages: dict[str, set[str]] = {}
     _cfg = DictConfig({})
 
+    def _clear(self) -> None:
+        self._schema = {}
+        self._packages = {}
+        self._cfg = DictConfig({})
+
     def __call__(
         self,
         *,
 
@@ -1,2 +1,16 @@
 # Copyright (c) 2024, Plinder Development Team
 # Distributed under the terms of the Apache License 2.0
+from textwrap import dedent
+
+try:
+    import ost  # noqa
+except (ImportError, ModuleNotFoundError):
+    raise ImportError(
+        dedent(
+            """\
+            plinder.eval requires the OpenStructureToolkit >= 2.8.0 (ost) to be installed.
+            Please refer to the documentation for installation instructions and current limitations.
+            See the note here: https://github.com/plinder-org/plinder?tab=readme-ov-file#-getting-started
+            """
+        )
+    )
@@ -11,7 +11,7 @@
 
 from plinder.core.scores.protein import cross_similarity as protein_cross_similarity
 from plinder.core.utils.log import setup_logger
-from plinder.data.smallmolecules import mol2morgan_fp, tanimoto_maxsim_matrix
+from plinder.data import smallmolecules
 
 LOG = setup_logger(__name__)
 
@@ -96,14 +96,14 @@ def compute_ligand_max_similarities(
 ) -> None:
     if "fp" not in df.columns:
         smiles_fp_dict = {
-            smi: mol2morgan_fp(smi)
+            smi: smallmolecules.mol2morgan_fp(smi)
             for smi in df["ligand_rdkit_canonical_smiles"].drop_duplicates().to_list()
         }
         df["fp"] = df["ligand_rdkit_canonical_smiles"].map(smiles_fp_dict)
 
     df_test = df.loc[df["split"] == test_label][["system_id", "fp"]].copy()
 
-    df_test["tanimoto_similarity_max"] = tanimoto_maxsim_matrix(
+    df_test["tanimoto_similarity_max"] = smallmolecules.tanimoto_maxsim_matrix(
         df.loc[df["split"] == train_label]["fp"].to_list(),
         df_test["fp"].to_list(),
     )
@@ -359,9 +359,9 @@ def main() -> None:
     args = parser.parse_args()
 
     StratifiedTestSet.from_split(
-        split_file=args.split_file,
-        data_dir=args.data_dir,
-        output_dir=args.output_dir,
+        split_file=Path(args.split_file),
+        data_dir=Path(args.data_dir),
+        output_dir=Path(args.output_dir),
         train_label=args.train_label,
         test_label=args.test_label,
         overwrite=args.overwrite,
 
@@ -161,6 +161,8 @@ def extract_and_score_test_set(
     predictions = pd.read_csv(prediction_file)
     test_systems = set(predictions["reference_system_id"])
     system_dir = output_dir / "test_systems"
+    system_dir.mkdir(parents=True, exist_ok=True)
+    (output_dir / "scores").mkdir(parents=True, exist_ok=True)
     if not overwrite:
         test_systems = test_systems - set(x.name for x in system_dir.iterdir())
     system_dir.mkdir(exist_ok=True)
Original file line number	Diff line number	Diff line change
`@@ -1,4 +1,5 @@`
`1`	`1`	`reports/`
	`2`	`+test_eval/`
`2`	`3`	`tox_conda*`
`3`	`4`
`4`	`5`	`# Byte-compiled / optimized / DLL files`
Original file line number	Diff line number	Diff line change
`@@ -275,7 +275,7 @@ def test_image(`
`275`	`275`	`cmd.append("test")`
`276`	`276`	`if args is not None and len(args):`
`277`	`277`	`cmd.extend(`
`278`		`- split(f'''/bin/bash -c "python -m pytest -v {' '.join(args)} && cp .coverage reports/.coverage"''')`
	`278`	`+ split(f'''/bin/bash -c "python -m pytest -n auto -v {' '.join(args)} && cp .coverage reports/.coverage"''')`
`279`	`279`	`)`
`280`	`280`	`Proc(cmd, env=env).execute()`
`281`	`281`	`if push:`