|
4 | 4 | from urllib.parse import urlparse |
5 | 5 | import mlflow |
6 | 6 | from filelock import FileLock |
7 | | -from mlflow.exceptions import MlflowException |
| 7 | +from mlflow.exceptions import MlflowException, RESOURCE_ALREADY_EXISTS, ErrorCode |
8 | 8 | from mlflow.entities import ViewType |
9 | 9 | import os, logging |
10 | 10 | from pathlib import Path |
|
15 | 15 | from ..config import C |
16 | 16 | from .recorder import Recorder |
17 | 17 | from ..log import get_module_logger |
| 18 | +from ..utils.exceptions import ExpAlreadyExistError |
18 | 19 |
|
19 | 20 | logger = get_module_logger("workflow", logging.INFO) |
20 | 21 |
|
@@ -94,6 +95,10 @@ def create_exp(self, experiment_name: Optional[Text] = None): |
94 | 95 | Returns |
95 | 96 | ------- |
96 | 97 | An experiment object. |
| 98 | +
|
| 99 | + Raise |
| 100 | + ----- |
| 101 | + ExpAlreadyExistError |
97 | 102 | """ |
98 | 103 | raise NotImplementedError(f"Please implement the `create_exp` method.") |
99 | 104 |
|
@@ -200,7 +205,14 @@ def _get_or_create_exp(self, experiment_id=None, experiment_name=None) -> (objec |
200 | 205 | if pr.scheme == "file": |
201 | 206 | with FileLock(os.path.join(pr.netloc, pr.path, "filelock")) as f: |
202 | 207 | return self.create_exp(experiment_name), True |
203 | | - return self.create_exp(experiment_name), True |
| 208 | + # NOTE: for other schemes like http, we double check to avoid create exp conflicts |
| 209 | + try: |
| 210 | + return self.create_exp(experiment_name), True |
| 211 | + except ExpAlreadyExistError: |
| 212 | + return ( |
| 213 | + self._get_exp(experiment_id=experiment_id, experiment_name=experiment_name), |
| 214 | + False, |
| 215 | + ) |
204 | 216 |
|
205 | 217 | def _get_exp(self, experiment_id=None, experiment_name=None) -> Experiment: |
206 | 218 | """ |
@@ -345,10 +357,15 @@ def end_exp(self, recorder_status: Text = Recorder.STATUS_S): |
345 | 357 | def create_exp(self, experiment_name: Optional[Text] = None): |
346 | 358 | assert experiment_name is not None |
347 | 359 | # init experiment |
348 | | - experiment_id = self.client.create_experiment(experiment_name) |
| 360 | + try: |
| 361 | + experiment_id = self.client.create_experiment(experiment_name) |
| 362 | + except MlflowException as e: |
| 363 | + if e.error_code == ErrorCode.Name(RESOURCE_ALREADY_EXISTS): |
| 364 | + raise ExpAlreadyExistError() |
| 365 | + raise e |
| 366 | + |
349 | 367 | experiment = MLflowExperiment(experiment_id, experiment_name, self.uri) |
350 | 368 | experiment._default_name = self._default_exp_name |
351 | | - |
352 | 369 | return experiment |
353 | 370 |
|
354 | 371 | def _get_exp(self, experiment_id=None, experiment_name=None): |
|
0 commit comments