Skip to content

Commit 0a66df4

Browse files
committed
add tableprint for latex
1 parent d92e55d commit 0a66df4

File tree

3 files changed

+64
-16
lines changed

3 files changed

+64
-16
lines changed

evaluation/print_table_4_latex.py

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
from evaluation.quantitative import mean_df, pd, mode
2+
3+
4+
def print_mark(key, name):
5+
if key in name:
6+
return '\checkmark'
7+
else:
8+
return ''
9+
10+
11+
if mode == 'end2end':
12+
table = pd.DataFrame(columns=['Img', 'Loc', 'Seg', 'Rep', 'Accuracy', 'F1', 'Precision', 'Recall', 'AUROC'])
13+
for experiment, row in mean_df.iterrows():
14+
table = pd.concat([table, pd.DataFrame({
15+
'Img': print_mark('image', experiment),
16+
'Loc': print_mark('loc', experiment),
17+
'Seg': print_mark('seg', experiment),
18+
'Rep': print_mark('clip', experiment),
19+
'Accuracy': row['Accuracy'],
20+
'F1': row['F1'],
21+
'Precision': row['Precision'],
22+
'Recall': row['Recall'],
23+
'AUROC': row['AUROC']
24+
}, index=[0]), ], ignore_index=True)
25+
# multiply floats by 100 to get percentage
26+
table.iloc[:, 4:] *= 100
27+
table = table.round(2)
28+
29+
print('\n\n')
30+
print(table.to_latex(index=False, float_format='%.2f'))
31+
else:
32+
table = pd.DataFrame(columns=['Encoder', 'Accuracy', 'F1', 'AUROC'])
33+
for experiment, row in mean_df.iterrows():
34+
table = pd.concat([table, pd.DataFrame({
35+
'Encoder': experiment,
36+
'Accuracy': row['Accuracy'],
37+
'F1': row['F1'],
38+
'AUROC': row['AUROC']
39+
}, index=[0]), ], ignore_index=True)
40+
# multiply floats by 100 to get percentage
41+
table.iloc[:, 1:] *= 100
42+
table = table.round(2)
43+
44+
print('\n\n')
45+
print(table.to_latex(index=False, float_format='%.2f'))

evaluation/quantitative.py

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -1,30 +1,31 @@
1+
from pathlib import Path
2+
3+
import pandas as pd
14
import torch
25
from torchmetrics import classification, MetricCollection
6+
37
from dataset.grazpedwri_dataset import GrazPedWriDataset
4-
import pandas as pd
5-
from pathlib import Path
6-
from evaluation.best_shot_accuracy import BestShotAccuracy
78

8-
mode = ['end2end', 'lin_eval'][0]
9+
mode = ['end2end', 'lin_eval'][1]
910

1011
metrics_kwargs = {'num_labels': GrazPedWriDataset.N_CLASSES, 'average': None}
1112
metrics = MetricCollection({
12-
"Acc": classification.MultilabelAccuracy(**metrics_kwargs),
13+
"Accuracy": classification.MultilabelAccuracy(**metrics_kwargs),
1314
"F1": classification.MultilabelF1Score(**metrics_kwargs),
1415
"Precision": classification.MultilabelPrecision(**metrics_kwargs),
1516
"Recall": classification.MultilabelRecall(**metrics_kwargs),
16-
"AUROC": classification.MultilabelAUROC(**metrics_kwargs),
17-
"BestShotAcc": BestShotAccuracy()
17+
"AUROC": classification.MultilabelAUROC(**metrics_kwargs)
1818
})
1919
pred_dir = Path('evaluation/predictions')
2020
gt = torch.load(pred_dir / 'ground_truth.pt')
2121

22-
mean_df = pd.DataFrame(columns=['Experiment', 'Acc', 'BestShotAcc', 'F1', 'Precision', 'Recall', 'AUROC'])
23-
experiment_df = pd.DataFrame(columns=['Experiment', 'Acc', 'F1', 'Precision', 'Recall', 'AUROC', 'AO_Class'])
22+
mean_df = pd.DataFrame(columns=['Experiment', 'Accuracy', 'F1', 'Precision', 'Recall', 'AUROC'])
23+
experiment_df = pd.DataFrame(columns=['Experiment', 'Accuracy', 'F1', 'Precision', 'Recall', 'AUROC', 'AO_Class'])
2424
for experiment in pred_dir.iterdir():
2525
is_line_eval = experiment.stem.startswith('LE')
2626
match_mode = (mode == 'lin_eval' and is_line_eval) or (mode == 'end2end' and not is_line_eval)
27-
if experiment.stem == 'ground_truth' or experiment.is_dir() or not match_mode:
27+
contains_mult_seg = 'mult_seg' in experiment.stem
28+
if experiment.stem == 'ground_truth' or experiment.is_dir() or not match_mode or contains_mult_seg:
2829
continue
2930

3031
pred = torch.load(experiment)
@@ -39,8 +40,7 @@
3940
performance = metrics(y_hat, y)
4041
mean_df = pd.concat([mean_df, pd.DataFrame({
4142
'Experiment': experiment.stem.rsplit('_', 1)[0],
42-
'Acc': performance['Acc'].mean().item(),
43-
"BestShotAcc": performance['BestShotAcc'].item(),
43+
'Accuracy': performance['Accuracy'].mean().item(),
4444
'F1': performance['F1'].mean().item(),
4545
'Precision': performance['Precision'].mean().item(),
4646
'Recall': performance['Recall'].mean().item(),
@@ -49,7 +49,7 @@
4949

5050
experiment_df = pd.concat([experiment_df, pd.DataFrame({
5151
'Experiment': experiment.stem.rsplit('_', 1)[0],
52-
'Acc': performance['Acc'].tolist(),
52+
'Accuracy': performance['Accuracy'].tolist(),
5353
'F1': performance['F1'].tolist(),
5454
'Precision': performance['Precision'].tolist(),
5555
'Recall': performance['Recall'].tolist(),

evaluation/statistical_significance_test.py

Lines changed: 6 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
warnings.filterwarnings("ignore")
1111

1212
candidate = "image_frac_loc_bin_seg_clip"
13+
test_direction = ['greater', 'two-sided'][0]
1314
significance_level = 0.05
1415

1516
pred_dir = Path('evaluation/predictions')
@@ -22,11 +23,12 @@
2223
filelist = list(gt.keys())
2324
gt = torch.stack([gt[file_stem] for file_stem in filelist]).int()
2425

25-
candidate_path = [experiment for experiment in available_experiments if experiment.stem.startswith(candidate)][0]
26+
candidate_path = [experiment for experiment in available_experiments if experiment.name.rsplit('_', 1)[0] == candidate][0]
2627
y_pred_canditate = torch.load(candidate_path)
2728
y_pred_canditate = torch.stack([y_pred_canditate[file_stem] for file_stem in filelist])
2829
auroc_canditate = metric(y_pred_canditate, gt)
29-
print(f'Candidate: {candidate_path.stem.rsplit('_', 1)[0]} with AUROC: {auroc_canditate.mean().item()}')
30+
print(f'Candidate: {candidate_path.name.rsplit('_', 1)[0]} with AUROC: {auroc_canditate.mean().item()}')
31+
print(f'Test direction: {test_direction}')
3032

3133
df = pd.DataFrame(columns=['Challenger', 'AUROC', 'statistic', 'p-value', f'significant at {significance_level}'])
3234
for challenger in available_experiments:
@@ -47,5 +49,6 @@
4749
}, index=[0]), ], ignore_index=True)
4850

4951
df.set_index('Challenger', inplace=True)
50-
df.sort_values('AUROC', ascending=False, inplace=True)
52+
df.sort_index(inplace=True)
53+
df.sort_values('p-value', ascending=False, inplace=True)
5154
print(df.to_string())

0 commit comments

Comments
 (0)