|
184 | 184 | "outputs": [], |
185 | 185 | "source": [ |
186 | 186 | "# Read activity data for EGFR into a pandas dataframe named df\n", |
| 187 | + "egfr_chembl25_link = 'https://github.com/volkamerlab/ai_in_medicine/raw/master/data/egfr_chembl25.csv'\n", |
187 | 188 | "#################### <-- insert code below\n", |
188 | | - "df = pd.read_csv('https://github.com/volkamerlab/ai_in_medicine/raw/update-2021.02/data/egfr_chembl25.csv')\n", |
189 | | - "display(df.head())\n", |
190 | | - "df.info()\n", |
| 189 | + "\n", |
191 | 190 | "#################### <-- insert code above" |
192 | 191 | ] |
193 | 192 | }, |
|
257 | 256 | "source": [ |
258 | 257 | "# Write the smiles for acetylsalicylic acid\n", |
259 | 258 | "####################\n", |
260 | | - "aspirin = Chem.MolFromSmiles('C1=C-C(C(O)=O)=C(OC(=O)C)-C=C1')\n", |
261 | | - "aspirin\n", |
| 259 | + "\n", |
262 | 260 | "####################" |
263 | 261 | ] |
264 | 262 | }, |
|
379 | 377 | "source": [ |
380 | 378 | "# Mark every molecule as active with an IC50 < 500\n", |
381 | 379 | "####################\n", |
382 | | - "df['active'] = np.zeros(len(df))\n", |
383 | | - "df.loc[df[df['IC50[nM]'] < 500].index, 'active'] = 1.0\n", |
384 | | - "display(df.head())\n", |
| 380 | + "\n", |
385 | 381 | "####################" |
386 | 382 | ] |
387 | 383 | }, |
|
421 | 417 | "x, y = df['maccs'].to_list(), df['active'].to_list()\n", |
422 | 418 | "# Split the features and labels into training and test sets\n", |
423 | 419 | "####################\n", |
424 | | - "x_train, x_test, y_train, y_test = train_test_split(x, y, test_size=0.2)\n", |
| 420 | + "\n", |
425 | 421 | "####################" |
426 | 422 | ] |
427 | 423 | }, |
|
442 | 438 | "source": [ |
443 | 439 | "# train model\n", |
444 | 440 | "####################\n", |
445 | | - "svc = SVC(probability=True)\n", |
446 | | - "svc.fit(x_train, y_train)\n", |
| 441 | + "\n", |
447 | 442 | "####################" |
448 | 443 | ] |
449 | 444 | }, |
|
465 | 460 | "source": [ |
466 | 461 | "# predict the activity of the test set\n", |
467 | 462 | "####################\n", |
468 | | - "y_test_pred = svc.predict(x_test)\n", |
| 463 | + "\n", |
469 | 464 | "####################" |
470 | 465 | ] |
471 | 466 | }, |
|
477 | 472 | "source": [ |
478 | 473 | "# calculate AUC\n", |
479 | 474 | "####################\n", |
480 | | - "svc_roc_auc = roc_auc_score(y_test, y_test_pred)\n", |
481 | | - "print('AUC:', svc_roc_auc)\n", |
| 475 | + "\n", |
482 | 476 | "####################" |
483 | 477 | ] |
484 | 478 | }, |
|
490 | 484 | "source": [ |
491 | 485 | "# plot the ROC curve\n", |
492 | 486 | "####################\n", |
493 | | - "fpr, tpr, thresholds = roc_curve(y_test, svc.predict_proba(x_test)[:,1])\n", |
494 | | - "plt.plot(fpr, tpr, label=f'SVC - AUC={round(svc_roc_auc,2)}')\n", |
495 | | - "plt.xlim([-0.05, 1.05])\n", |
496 | | - "plt.ylim([-0.05, 1.05])\n", |
497 | | - "plt.plot([0, 1], [0, 1], linestyle='--', label='Random', lw=2, color=\"black\") # Random curve\n", |
498 | | - "plt.xlabel('False positive rate', size=12)\n", |
499 | | - "plt.ylabel('True positive rate', size=12)\n", |
500 | | - "plt.tick_params(labelsize=12)\n", |
501 | | - "plt.legend(fontsize=12)\n", |
502 | | - "plt.show()\n", |
| 487 | + "\n", |
503 | 488 | "####################" |
504 | 489 | ] |
505 | 490 | }, |
|
519 | 504 | "outputs": [], |
520 | 505 | "source": [ |
521 | 506 | "# load data and assign maccs keys \n", |
| 507 | + "egfr_candidates_link = 'https://github.com/volkamerlab/ai_in_medicine/raw/master/data/egfr_candidates.csv'\n", |
522 | 508 | "####################\n", |
523 | | - "df2 = pd.read_csv('https://github.com/volkamerlab/ai_in_medicine/raw/update-2021.02/data/egfr_candidates.csv')\n", |
524 | | - "add_mols_and_maccs(df2)\n", |
525 | | - "display(df2)\n", |
| 509 | + "\n", |
526 | 510 | "####################" |
527 | 511 | ] |
528 | 512 | }, |
|
534 | 518 | "source": [ |
535 | 519 | "# predict the activity\n", |
536 | 520 | "####################\n", |
537 | | - "y_pred_svc = svc.predict(df2['maccs'].tolist())\n", |
538 | | - "display(y_pred_svc)\n", |
| 521 | + "\n", |
539 | 522 | "####################" |
540 | 523 | ] |
541 | 524 | }, |
|
563 | 546 | "name": "python", |
564 | 547 | "nbconvert_exporter": "python", |
565 | 548 | "pygments_lexer": "ipython3", |
566 | | - "version": "3.8.3" |
| 549 | + "version": "3.9.1" |
567 | 550 | } |
568 | 551 | }, |
569 | 552 | "nbformat": 4, |
|
0 commit comments