Skip to content
Merged
Changes from 1 commit
Commits
Show all changes
51 commits
Select commit Hold shift + click to select a range
43d6007
Migrate validation to Protocol._validate
IAlibay Dec 5, 2025
2cd56ba
some fixes
IAlibay Dec 5, 2025
70e6d7a
Merge branch 'main' into validate-rfe
IAlibay Dec 5, 2025
f330562
move some things around
IAlibay Dec 8, 2025
95b92b3
Merge branch 'main' into validate-rfe
IAlibay Dec 15, 2025
1e0153e
add validate endstate tests
IAlibay Dec 15, 2025
fe2b879
Merge branch 'validate-rfe' of github.com:OpenFreeEnergy/openfe into …
IAlibay Dec 15, 2025
fbc4554
validate mapping tests
IAlibay Dec 15, 2025
c2f49d2
net charge validation tests
IAlibay Dec 15, 2025
c50f99c
more stuff
IAlibay Dec 22, 2025
9e0d29b
remove old tests
IAlibay Dec 24, 2025
2fe8ff9
make hybrid samplers not rely on htf
IAlibay Dec 24, 2025
4a0bd26
fix up test
IAlibay Dec 24, 2025
5848adc
fix up some slow tests
IAlibay Dec 24, 2025
1aaef87
Merge branch 'main' into multistate-nohtf
IAlibay Dec 24, 2025
b6d5ecd
Fix up the one test
IAlibay Dec 26, 2025
0605d11
fix a few things
IAlibay Dec 26, 2025
48106a2
fix the remaining tests
IAlibay Dec 26, 2025
5af66e8
cleanup imports
IAlibay Dec 26, 2025
ad0b5fb
Merge branch 'validate-rfe' into move-rfe-protocol
IAlibay Dec 26, 2025
45e004c
Merge branch 'multistate-nohtf' into move-rfe-protocol
IAlibay Dec 26, 2025
58dd71c
Migrate protocol, units, and results for the hybridtop protocol
IAlibay Dec 26, 2025
792996e
Add news item
IAlibay Dec 26, 2025
91f1788
Merge branch 'validate-rfe' into move-rfe-protocol
IAlibay Dec 26, 2025
527b870
Merge branch 'main' into validate-rfe
IAlibay Dec 26, 2025
7d17998
fix redefine
IAlibay Dec 27, 2025
43eb947
start modularising everything
IAlibay Dec 27, 2025
d1bd736
Add charge validation for smcs when dealing with ismorphic molecules
IAlibay Dec 27, 2025
51a6de1
break down the rfe units into bits
IAlibay Dec 29, 2025
6a5a76a
more broadly disallow oechem as a backend when creating systems
IAlibay Dec 29, 2025
cdd3da0
fix issue with nc being undefined
IAlibay Dec 29, 2025
e0a8e2a
Merge branch 'validate-rfe' into move-rfe-protocol
IAlibay Dec 29, 2025
a0ef737
Merge branch 'move-rfe-protocol' into breakdown-rfe-protocolunit
IAlibay Dec 29, 2025
b826803
Fix missing import
IAlibay Dec 29, 2025
42ddbcf
Merge branch 'move-rfe-protocol' into breakdown-rfe-protocolunit
IAlibay Dec 29, 2025
063e8ce
Fix comp getter
IAlibay Dec 29, 2025
3844bb5
Merge branch 'move-rfe-protocol' into breakdown-rfe-protocolunit
IAlibay Dec 29, 2025
a98c799
update module name
IAlibay Dec 30, 2025
5d0bc7e
Merge branch 'move-rfe-protocol' into breakdown-rfe-protocolunit
IAlibay Dec 30, 2025
7c915ed
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jan 3, 2026
951ac15
move a few things around to make life easier
IAlibay Jan 3, 2026
b9f8264
Merge branch 'main' into breakdown-rfe-protocolunit
IAlibay Jan 7, 2026
2e4b455
fix typo
IAlibay Jan 7, 2026
7182805
fix some merge issues
IAlibay Jan 7, 2026
28b4381
fix test failures due to integrator checks
IAlibay Jan 7, 2026
726f517
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Jan 7, 2026
1587673
try to make mypy happy
IAlibay Jan 7, 2026
5cca950
Merge branch 'breakdown-rfe-protocolunit' of github.com:OpenFreeEnerg…
IAlibay Jan 7, 2026
1fbec7d
add early exist if there's no molecules
IAlibay Jan 7, 2026
3cd758e
Apply suggestions from code review
IAlibay Jan 7, 2026
6622428
Update openfe/protocols/openmm_rfe/hybridtop_units.py
IAlibay Jan 7, 2026
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
start modularising everything
  • Loading branch information
IAlibay committed Dec 27, 2025
commit 43eb947872896f350c694d714ced77789b495b0b
307 changes: 253 additions & 54 deletions openfe/protocols/openmm_rfe/hybridtop_units.py
Original file line number Diff line number Diff line change
Expand Up @@ -111,25 +111,132 @@ def __init__(
generation=generation,
)

def _prepare(
self,
verbose: bool,
scratch_basepath: pathlib.Path | None,
shared_basepath: pathlib.Path | None,
):
"""
Set basepaths and do some initial logging.

Parameters
----------
verbose : bool
Verbose output of the simulation progress. Output is provided at the
INFO level logging.
scratch_basepath : pathlib.Path | None
Optional scratch base path to write scratch files to.
shared_basepath : pathlib.Path | None
Optional shared base path to write shared files to.
"""
self.verbose = verbose

if self.verbose:
self.logger.info("Setting up the hybrid topology simulation")

# set basepaths
def _set_optional_path(basepath):
if basepath is None:
return pathlib.Path(".")
return basepath

self.scratch_basepath = _set_optional_path(scratch_basepath)
self.shared_basepath = _set_optional_path(shared_basepath)

@staticmethod
def _get_settings(
settings: RelativeHybridTopologyProtocolSettings
) -> dict[str, SettingsBaseModel]:
"""
Get a dictionary of Protocol settings.

Returns
-------
protocol_settings : dict[str, SettingsBaseModel]

Notes
-----
We return a dict so that we can duck type behaviour between phases.
For example subclasses may contain both `solvent` and `complex`
settings, using this approach we can extract the relevant entry
to the same key and pass it to other methods in a seamless manner.
"""
protocol_settings: dict[str, SettingsBaseModel] = {}
protocol_settings["forcefield_settings"] = settings.forcefield_settings
protocol_settings["thermo_settings"] = settings.thermo_settings
protocol_settings["alchemical_settings"] = settings.alchemical_settings
protocol_settings["lambda_settings"] = settings.lambda_settings
protocol_settings["charge_settings"] = settings.partial_charge_settings
protocol_settings["solvation_settings"] = settings.solvation_settings
protocol_settings["simulation_settings"] = settings.simulation_settings
protocol_settings["output_settings"] = settings.output_settings
protocol_settings["integrator_settings"] = settings.integrator_settings
protocol_settings["engine_settings"] = settings.engine_settings
return protocol_settings

@staticmethod
def _get_components(
stateA: ChemicalSystem,
stateB: ChemicalSystem
) -> tuple[
dict[str, Component],
SolventComponent,
ProteinComponent,
dict[SmallMoleculeComponent, OFFMolecule]
]:
"""
Get the components from the ChemicalSystem inputs.

Parameters
----------
stateA : ChemicalSystem
ChemicalSystem defining the state A components.
stateB : CHemicalSystem
ChemicalSystem defining the state B components.

Returns
-------
alchem_comps : dict[str, Component]
Dictionary of alchemical components.
solv_comp : SolventComponent
The solvent component.
protein_comp : ProteinComponent
The protein component.
small_mols : dict[SmallMoleculeComponent, openff.toolkit.Molecule]
Dictionary of small molecule components paired
with their OpenFF Molecule.
"""
alchem_comps = system_validation.get_alchemical_components(stateA, stateB)

solvent_comp, protein_comp, smcs_A = system_validation.get_components(stateA)
_, _, smcs_B = system_validation.get_components(stateB)

small_mols = {
m: m.to_openff()
for m in set(smcs_A).union(set(smcs_B))
}

return alchem_comps, solvent_comp, protein_comp, small_mols

@staticmethod
def _assign_partial_charges(
charge_settings: OpenFFPartialChargeSettings,
off_small_mols: dict[str, list[tuple[SmallMoleculeComponent, OFFMolecule]]],
small_mols: dict[SmallMoleculeComponent, OFFMolecule],
) -> None:
"""
Assign partial charges to SMCs.
Assign partial charges to the OpenFF Molecules associated with all
the SmallMoleculeComponents in the transformation.

Parameters
----------
charge_settings : OpenFFPartialChargeSettings
Settings for controlling how the partial charges are assigned.
off_small_mols : dict[str, list[tuple[SmallMoleculeComponent, OFFMolecule]]]
Dictionary of dictionary of OpenFF Molecules to add, keyed by
state and SmallMoleculeComponent.
small_mols : dict[SmallMoleculeComponent, openff.toolkit.Molecule]
Dictionary of OpenFF Molecules to add, keyed by
their associated SmallMoleculeComponent.
"""
for smc, mol in chain(
off_small_mols["stateA"], off_small_mols["stateB"], off_small_mols["both"]
):
for smc, mol in small_mols.items():
charge_generation.assign_offmol_partial_charges(
offmol=mol,
overwrite=False,
Expand All @@ -139,6 +246,129 @@ def _assign_partial_charges(
nagl_model=charge_settings.nagl_model,
)

@staticmethod
def _get_system_generator(
shared_basepath: pathlib.Path,
settings: dict[str, SettingsBaseModel],
solvent_comp: SolventComponent | None,
openff_molecules: list[OFFMolecule] | None,
) -> SystemGenerator:
"""
Get an OpenMM SystemGenerator.

Parameters
----------
settings : dict[str, SettingsBaseModel]
A dictionary of protocol settings.
solvent_comp : SolventComponent | None
The solvent component of the system, if any.
openff_molecules : list[openff.Toolkit] | None
A list of openff molecules to generate templates for, if any.

Returns
-------
system_generator : openmmtools.SystemGenerator
The SystemGenerator for the protocol.
"""
ffcache = settings["output_settings"].forcefield_cachea

if ffcache is not None:
ffcache = shared_basepath / ffcache

# Block out oechem backend in system_generator calls to avoid
# any issues with smiles roundtripping between rdkit and oechem
with without_oechem_backend():
system_generator = system_creation.get_system_generator(
forcefield_settings=settings["forcefield_settings"],
integrator_settings=settings["integrator_settings"],
thermo_settings=settings["thermo_settings"],
cache=ffcache,
has_solvent=solvent_comp is not None,
)

# Handle openff Molecule templates
# TODO: revisit this once the SystemGenerator update happens
# and we start loading the whole protein into OpenFF Topologies

# First deduplicate isomoprhic molecules
unique_offmols = []
for mol in openff_molecules:
unique = all(
[
not mol.is_isomorphic_with(umol)
for umol in unique_offmols
]
)
if unique:
unique_offmols.append(mol)

# register all the templates
system_generator.add_molecules(unique_offmols)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is this part new or did I just not see it before? Just wondering out of curiosity why this is necessary.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This is new - it takes over this code:

for smc, mol in chain(
off_small_mols["stateA"], off_small_mols["stateB"], off_small_mols["both"]
):
system_generator.create_system(mol.to_topology().to_openmm(), molecules=[mol])

Essentially it does the exact same thing, but with less boiler plate / cost. It also avoids a potential case where you pass the same isomorphic molecule twice - technically that does nothing, but it's safer to just not do it.


return system_generator

def _get_omm_objects(
self,
stateA: ChemicalSystem,
stateB: ChemicalSystem,
mapping: LigandAtomMapping,
settings: dict[str, SettingsBaseModel],
protein_component: ProteinComponent | None,
solvent_component: SolventComponent | None,
small_mols: dict[SmallMoleculeComponent, OFFMolecule]
):
"""
Get OpenMM objects for both end states A and B.

Parameters
----------
stateA : ChemicalSystem
ChemicalSystem defining end state A.
stateB : ChmiecalSysstem
ChemicalSystem defining end state B.
mapping : LigandAtomMapping
The mapping for alchemical components between state A and B.
settings : dict[str, SettingsBaseModel]
Settings for the transformation.
protein_component : ProteinComponent | None
The common ProteinComponent between the end states, if there is is one.
solvent_component : SolventComponent | None
The common SolventComponent between the end states, if there is one.
small_mols : dict[SmallMoleculeCOmponent, openff.toolkit.Molecule]
The small molecules for both end states.

Returns
-------
stateA_system : openmm.System
OpenMM System for state A.
stateA_topology : openmm.app.Topology
OpenMM Topology for the state A System.
stateA_positions : openmm.unit.Quantity
Positions of partials for state A System.
stateB_system : openmm.System
OpenMM System for state B.
stateB_topology : openmm.app.Topology
OpenMM Topology for the state B System.
stateB_positions : openmm.unit.Quantity
Positions of partials for state B System.
system_mapping : dict[str, dict[int, int]]
Dictionary of mappings defining the correspondance between
the two state Systems.
"""
if self.verbose:
self.logger.info("Parameterizing systems")

# Get the system generator with all the templates registered
system_generator = self._get_system_generator(
shared_basepath=self.shared_basepath,
settings=settings,
solv_comp=solvent_component,
openff_molecules=list(small_mols.values())
)

....


def run(
self, *, dry=False, verbose=True, scratch_basepath=None, shared_basepath=None
) -> dict[str, Any]:
Expand Down Expand Up @@ -169,36 +399,26 @@ def run(
error
Exception if anything failed
"""
if verbose:
self.logger.info("Preparing the hybrid topology simulation")
if scratch_basepath is None:
scratch_basepath = pathlib.Path(".")
if shared_basepath is None:
# use cwd
shared_basepath = pathlib.Path(".")

# 0. General setup and settings dependency resolution step

# Extract relevant settings
protocol_settings: RelativeHybridTopologyProtocolSettings = self._inputs[
"protocol"
].settings
# Prepare paths & verbosity
self._prepare(verbose, scratch_basepath, shared_basepath)

# Get settings
settings = self._get_settings(self._inputs["protocol"].settings)

# Get components
stateA = self._inputs["stateA"]
stateB = self._inputs["stateB"]
mapping = self._inputs["ligandmapping"]

forcefield_settings: settings.OpenMMSystemGeneratorFFSettings = (
protocol_settings.forcefield_settings
alchem_comps, solvent_comp, protein_comp, small_mols = self._get_components(
stateA, stateB
)
thermo_settings: settings.ThermoSettings = protocol_settings.thermo_settings
alchem_settings: AlchemicalSettings = protocol_settings.alchemical_settings
lambda_settings: LambdaSettings = protocol_settings.lambda_settings
charge_settings: BasePartialChargeSettings = protocol_settings.partial_charge_settings
solvation_settings: OpenMMSolvationSettings = protocol_settings.solvation_settings
sampler_settings: MultiStateSimulationSettings = protocol_settings.simulation_settings
output_settings: MultiStateOutputSettings = protocol_settings.output_settings
integrator_settings: IntegratorSettings = protocol_settings.integrator_settings

# Assign partial charges now to avoid any discrepancies later
self._assign_partial_charges(charge_settings, small_mols)



# TODO: move these down, not needed until we get to the sampler
# TODO: Also validate various conversions?
# Convert various time based inputs to steps/iterations
steps_per_iteration = settings_validation.convert_steps_per_iteration(
Expand All @@ -217,34 +437,13 @@ def run(
mc_steps=steps_per_iteration,
)

solvent_comp, protein_comp, small_mols = system_validation.get_components(stateA)

# Get the change difference between the end states
# and check if the charge correction used is appropriate
charge_difference = mapping.get_alchemical_charge_difference()

# 1. Create stateA system
self.logger.info("Parameterizing molecules")

# a. create offmol dictionaries and assign partial charges
# workaround for conformer generation failures
# see openfe issue #576
# calculate partial charges manually if not already given
# convert to OpenFF here,
# and keep the molecule around to maintain the partial charges
off_small_mols: dict[str, list[tuple[SmallMoleculeComponent, OFFMolecule]]]
off_small_mols = {
"stateA": [(mapping.componentA, mapping.componentA.to_openff())],
"stateB": [(mapping.componentB, mapping.componentB.to_openff())],
"both": [
(m, m.to_openff())
for m in small_mols
if (m != mapping.componentA and m != mapping.componentB)
],
}

self._assign_partial_charges(charge_settings, off_small_mols)

# b. get a system generator
if output_settings.forcefield_cache is not None:
ffcache = shared_basepath / output_settings.forcefield_cache
Expand Down