|
| 1 | +from vlmeval.smp import * |
| 2 | + |
| 3 | + |
| 4 | +def OCRBench_eval(eval_file): |
| 5 | + OCRBench_score = { |
| 6 | + 'Regular Text Recognition': 0, |
| 7 | + 'Irregular Text Recognition': 0, |
| 8 | + 'Artistic Text Recognition': 0, |
| 9 | + 'Handwriting Recognition': 0, |
| 10 | + 'Digit String Recognition': 0, |
| 11 | + 'Non-Semantic Text Recognition': 0, |
| 12 | + 'Scene Text-centric VQA': 0, |
| 13 | + 'Doc-oriented VQA': 0, |
| 14 | + 'Key Information Extraction': 0, |
| 15 | + 'Handwritten Mathematical Expression Recognition': 0 |
| 16 | + } |
| 17 | + |
| 18 | + logger = get_logger('Evaluation') |
| 19 | + |
| 20 | + data = load(eval_file) |
| 21 | + lt = len(data) |
| 22 | + lines = [data.iloc[i] for i in range(lt)] |
| 23 | + for i in tqdm(range(len(lines))): |
| 24 | + line = lines[i] |
| 25 | + predict = str(line['prediction']) |
| 26 | + answers = eval(line['answer']) |
| 27 | + category = line['category'] |
| 28 | + if category == 'Handwritten Mathematical Expression Recognition': |
| 29 | + for j in range(len(answers)): |
| 30 | + answer = answers[j].strip().replace('\n', ' ').replace(' ', '') |
| 31 | + predict = predict.strip().replace('\n', ' ').replace(' ', '') |
| 32 | + if answer in predict: |
| 33 | + OCRBench_score[category] += 1 |
| 34 | + break |
| 35 | + else: |
| 36 | + for j in range(len(answers)): |
| 37 | + answer = answers[j].lower().strip().replace('\n', ' ') |
| 38 | + predict = predict.lower().strip().replace('\n', ' ') |
| 39 | + if answer in predict: |
| 40 | + OCRBench_score[category] += 1 |
| 41 | + break |
| 42 | + |
| 43 | + final_score_dict = {} |
| 44 | + final_score_dict['Text Recognition'] = ( |
| 45 | + OCRBench_score['Regular Text Recognition'] + OCRBench_score['Irregular Text Recognition'] |
| 46 | + + OCRBench_score['Artistic Text Recognition'] + OCRBench_score['Handwriting Recognition'] |
| 47 | + + OCRBench_score['Digit String Recognition'] + OCRBench_score['Non-Semantic Text Recognition'] |
| 48 | + ) |
| 49 | + final_score_dict['Scene Text-centric VQA'] = OCRBench_score['Scene Text-centric VQA'] |
| 50 | + final_score_dict['Doc-oriented VQA'] = OCRBench_score['Doc-oriented VQA'] |
| 51 | + final_score_dict['Key Information Extraction'] = OCRBench_score['Key Information Extraction'] |
| 52 | + final_score_dict['Handwritten Mathematical Expression Recognition'] = \ |
| 53 | + OCRBench_score['Handwritten Mathematical Expression Recognition'] |
| 54 | + final_score_dict['Final Score'] = ( |
| 55 | + final_score_dict['Text Recognition'] + final_score_dict['Scene Text-centric VQA'] |
| 56 | + + final_score_dict['Doc-oriented VQA'] + final_score_dict['Key Information Extraction'] |
| 57 | + + final_score_dict['Handwritten Mathematical Expression Recognition'] |
| 58 | + ) |
| 59 | + final_score_dict['Final Score Norm'] = float(final_score_dict['Final Score']) / 10 |
| 60 | + score_pth = eval_file.replace('.xlsx', '_score.json') |
| 61 | + dump(final_score_dict, score_pth) |
| 62 | + logger.info(f'OCRBench_eval successfully finished evaluating {eval_file}, results saved in {score_pth}') |
| 63 | + logger.info('Score: ') |
| 64 | + for key, value in final_score_dict.items(): |
| 65 | + logger.info('{}:{}'.format(key, value)) |
0 commit comments