Add files via upload

yangyang-PKU · web-flow · commit b318d63d2040 · 2023-04-26T11:58:59.000+08:00
diff --git a/Code/ED/const.py b/Code/ED/const.py
@@ -0,0 +1,76 @@
+EVENT_TYPE = [
+    "Movement.Transport",
+    "Personnel.Elect",
+    "Personnel.Start-Position",
+    "Personnel.Nominate",
+    "Personnel.End-Position",
+    "Conflict.Attack",
+    "Conflict.Demonstrate",
+    "Contact.Phone-Write",
+    "Contact.Meet",
+    "Transaction.Transfer-Money",
+    "Transaction.Transfer-Ownership",
+    "Business.Start-Org",
+    "Business.Merge-Org",
+    "Business.Declare-Bankruptcy",
+    "Business.End-Org",
+    "Life.Be-Born",
+    "Life.Injure",
+    "Life.Die",
+    "Life.Marry",
+    "Life.Divorce",
+    "Justice.Sue",
+    "Justice.Arrest-Jail",
+    "Justice.Execute",
+    "Justice.Charge-Indict",
+    "Justice.Convict",
+    "Justice.Trial-Hearing",
+    "Justice.Sentence",
+    "Justice.Release-Parole",
+    "Justice.Fine",
+    "Justice.Pardon",
+    "Justice.Appeal",
+    "Justice.Extradite",
+    "Justice.Acquit"
+]
+
+ENTITY = ['PER', 'ORG', 'GPE', 'LOC', 'FAC', 'VEH', 'WEA']
+
+ROLE = ['Org', 'Place', 'Instrument', 'Vehicle', 'Attacker', 'Prosecutor', 'Agent', 'Victim', 'Origin', 'Target', 'Giver', 'Seller', 'Defendant', 'Recipient', 'Entity', 'Plaintiff', 'Person', 'Artifact', 'Destination', 'Adjudicator', 'Beneficiary', 'Buyer']
+
+
+All_Valid_EntTypes = {
+    ('Movement.Transport', 'Vehicle'): {'VEH'}, 
+    ('Movement.Transport', 'Artifact'): {'VEH', 'PER', 'WEA'}, 
+    ('Movement.Transport', 'Destination'): {'GPE', 'FAC', 'LOC'}, 
+    ('Personnel.Elect', 'Person'): {'PER'}, 
+    ('Movement.Transport', 'Agent'): {'ORG', 'GPE', 'PER'}, 
+    ('Personnel.Start-Position', 'Person'): {'PER'}, 
+    ('Personnel.Start-Position', 'Entity'): {'ORG', 'GPE'}, 
+    ('Personnel.Nominate', 'Person'): {'PER'}, 
+    ('Conflict.Attack', 'Place'): {'GPE', 'FAC', 'LOC'}, 
+    ('Personnel.End-Position', 'Entity'): {'ORG', 'GPE'}, 
+    ('Personnel.End-Position', 'Person'): {'PER'}, 
+    ('Contact.Meet', 'Entity'): {'ORG', 'GPE', 'PER'}, 
+    ('Contact.Meet', 'Place'): {'GPE', 'FAC', 'LOC'}, 
+    ('Life.Marry', 'Person'): {'PER'}, 
+    ('Personnel.Elect', 'Entity'): {'ORG', 'GPE', 'PER'}, 
+    ('Conflict.Attack', 'Target'): {'ORG', 'PER', 'VEH', 'FAC', 'LOC', 'WEA'}, 
+    ('Conflict.Attack', 'Attacker'): {'ORG', 'GPE', 'PER'}, 
+    ('Transaction.Transfer-Money', 'Giver'): {'ORG', 'GPE', 'PER'},
+    ('Transaction.Transfer-Money', 'Recipient'): {'ORG', 'GPE', 'PER'}, 
+    ('Conflict.Demonstrate', 'Entity'): {'ORG', 'PER'}, 
+    ('Conflict.Demonstrate', 'Place'): {'GPE', 'FAC', 'LOC'}, 
+    ('Business.End-Org', 'Place'): {'GPE', 'FAC'}, 
+    ('Justice.Sue', 'Plaintiff'): {'ORG', 'PER'}, 
+    ('Life.Injure', 'Victim'): {'PER'}, 
+    ('Life.Injure', 'Agent'): {'GPE', 'PER'}, 
+    ('Life.Die', 'Victim'): {'PER'}, 
+    ('Life.Die', 'Agent'): {'ORG', 'GPE', 'PER'}, 
+    ('Personnel.Start-Position', 'Place'): {'GPE', 'FAC'}, 
+    ('Life.Divorce', 'Place'): {'GPE', 'FAC', 'LOC'},
+    ('Life.Die', 'Place'): {'GPE', 'FAC', 'LOC'}, 
+    ('Justice.Arrest-Jail', 'Person'): {'PER'}, 
+    ('Justice.Arrest-Jail', 'Agent'): {'ORG', 'GPE', 'PER'}, 
+    ('Personnel.End-Position', 'Place'): {'GPE', 'FAC'}, 
+    ('Contact.Phone-Write', 'Entity'): {'ORG', 'PER'}, ('Life.Injure', 'Place'): {'GPE', 'FAC', 'LOC'}, ('Transaction.Transfer-Ownership', 'Buyer'): {'ORG', 'GPE', 'PER'}, ('Transaction.Transfer-Ownership', 'Artifact'): {'ORG', 'VEH', 'FAC', 'WEA'}, ('Transaction.Transfer-Ownership', 'Seller'): {'ORG', 'GPE', 'PER'}, ('Conflict.Attack', 'Instrument'): {'VEH', 'WEA'}, ('Life.Die', 'Instrument'): {'VEH', 'WEA'}, ('Justice.Arrest-Jail', 'Place'): {'GPE', 'FAC'}, ('Movement.Transport', 'Origin'): {'GPE', 'FAC', 'LOC'}, ('Business.End-Org', 'Org'): {'ORG'}, ('Life.Injure', 'Instrument'): {'VEH', 'WEA'}, ('Transaction.Transfer-Ownership', 'Place'): {'GPE', 'FAC', 'LOC'}, ('Transaction.Transfer-Ownership', 'Beneficiary'): {'GPE', 'PER'}, ('Justice.Execute', 'Place'): {'GPE', 'FAC'}, ('Justice.Execute', 'Agent'): {'ORG', 'GPE', 'PER'}, ('Conflict.Attack', 'Victim'): {'PER'}, ('Contact.Phone-Write', 'Place'): {'GPE', 'FAC', 'LOC'}, ('Justice.Trial-Hearing', 'Defendant'): {'ORG', 'PER'}, ('Justice.Execute', 'Person'): {'PER'}, ('Movement.Transport', 'Place'): {'GPE'}, ('Personnel.Elect', 'Place'): {'GPE', 'LOC'}, ('Life.Be-Born', 'Place'): {'GPE', 'FAC', 'LOC'}, ('Justice.Charge-Indict', 'Adjudicator'): {'ORG', 'PER'}, ('Business.Start-Org', 'Org'): {'ORG'}, ('Business.Start-Org', 'Place'): {'GPE', 'FAC'}, ('Justice.Convict', 'Defendant'): {'ORG', 'PER'}, ('Justice.Convict', 'Adjudicator'): {'ORG'}, ('Justice.Sentence', 'Defendant'): {'ORG', 'PER'}, ('Justice.Sentence', 'Adjudicator'): {'ORG', 'GPE', 'PER'}, ('Business.Declare-Bankruptcy', 'Org'): {'ORG', 'PER'}, ('Justice.Release-Parole', 'Entity'): {'ORG', 'GPE', 'PER'}, ('Justice.Release-Parole', 'Person'): {'PER'}, ('Justice.Charge-Indict', 'Defendant'): {'ORG', 'PER'}, ('Justice.Trial-Hearing', 'Place'): {'GPE', 'FAC', 'LOC'}, ('Justice.Trial-Hearing', 'Adjudicator'): {'ORG', 'PER'}, ('Justice.Trial-Hearing', 'Prosecutor'): {'ORG', 'PER'}, ('Justice.Charge-Indict', 'Prosecutor'): {'ORG', 'GPE', 'PER'}, ('Justice.Fine', 'Entity'): {'ORG', 'GPE', 'PER'}, ('Business.Start-Org', 'Agent'): {'ORG', 'GPE', 'PER'}, ('Justice.Pardon', 'Adjudicator'): {'ORG', 'PER'}, ('Justice.Charge-Indict', 'Place'): {'GPE', 'FAC', 'LOC'}, ('Justice.Appeal', 'Adjudicator'): {'ORG', 'PER'}, ('Justice.Appeal', 'Plaintiff'): {'ORG', 'GPE', 'PER'}, ('Justice.Sentence', 'Place'): {'GPE', 'FAC'}, ('Life.Die', 'Person'): {'PER'}, ('Life.Be-Born', 'Person'): {'PER'}, ('Justice.Release-Parole', 'Place'): {'GPE', 'FAC'}, ('Justice.Sue', 'Defendant'): {'ORG', 'GPE', 'PER'}, ('Transaction.Transfer-Money', 'Beneficiary'): {'ORG', 'GPE', 'PER'}, ('Justice.Convict', 'Place'): {'GPE'}, ('Justice.Extradite', 'Origin'): {'GPE', 'FAC'}, ('Justice.Extradite', 'Destination'): {'GPE'}, ('Justice.Appeal', 'Place'): {'GPE', 'FAC'}, ('Business.Declare-Bankruptcy', 'Place'): {'GPE'}, ('Justice.Fine', 'Adjudicator'): {'PER'}, ('Life.Marry', 'Place'): {'GPE', 'FAC'}, ('Life.Divorce', 'Person'): {'PER'}, ('Personnel.Nominate', 'Agent'): {'ORG', 'GPE', 'PER'}, ('Business.Merge-Org', 'Org'): {'ORG'}, ('Justice.Acquit', 'Defendant'): {'PER'}, ('Justice.Sue', 'Adjudicator'): {'ORG', 'PER'}, ('Justice.Sue', 'Place'): {'GPE', 'LOC'}, ('Justice.Fine', 'Place'): {'GPE', 'FAC'}, ('Justice.Pardon', 'Place'): {'GPE'}, ('Justice.Pardon', 'Defendant'): {'PER'}, ('Justice.Acquit', 'Adjudicator'): {'PER'}, ('Transaction.Transfer-Money', 'Place'): {'GPE', 'LOC'}, ('Justice.Extradite', 'Agent'): {'ORG'}}
diff --git a/Code/ED/score_ED_E+.py b/Code/ED/score_ED_E+.py
@@ -0,0 +1,188 @@
+import os
+import json
+from const import EVENT_TYPE
+import torch
+from torchmetrics.classification import MulticlassCalibrationError
+
+
+def safe_div(num, denom):
+    if denom > 0:
+        return num / denom
+    else:
+        return 0
+
+def compute_f1(predicted, gold, matched):
+    precision = safe_div(matched, predicted)
+    recall = safe_div(matched, gold)
+    f1 = safe_div(2 * precision * recall, precision + recall)
+    return precision, recall, f1
+
+
+def evaluate(preds, gold):
+    assert len(preds) == len(gold)
+
+    for example_id in preds:
+        preds[example_id] = list(set([tuple(i) for i in preds[example_id]]))
+        gold[example_id] = [tuple(i) for i in gold[example_id]]
+
+
+    pred_tri_num, gold_tri_num = 0, 0
+    match_idn_num, match_cls_num, match_word_num = 0, 0, 0
+    correct_confidence = 0
+    incorrect_confidence = 0
+    if_reasonable_num = 0
+    for example_id in preds:
+        pred_tri_num += len(preds[example_id])
+        gold_tri_num += len(gold[example_id])
+
+    calibrate_record = []
+    invalid_event_type = 0
+    for example_id in preds:
+        for pred_tri in preds[example_id]:
+            start, end, event_type, trigger_word, confidence, if_reasonable = pred_tri
+            match_idn = {item for item in gold[example_id] if item[0] == start and item[1] == end}
+            match_word = [item for item in gold[example_id] if item[3] == trigger_word and item[2]==event_type]
+            if match_word:
+                match_word_num += 1
+                correct_confidence += confidence
+                if if_reasonable:
+                    if_reasonable_num += 1
+                if confidence == 0:
+                    invalid_event_type += 1
+                    continue
+                gold_label_idx = LABEL2ID[match_word[0][2]]
+                pred_label_idx = LABEL2ID[event_type]
+                calibrate_record.append([gold_label_idx, pred_label_idx, confidence / 100])
+            else:
+                incorrect_confidence += confidence
+                if event_type not in LABEL2ID:
+                    invalid_event_type += 1
+                    continue
+                if confidence == 0:
+                    invalid_event_type += 1
+                    continue
+                gold_label_idx = LABEL2ID['None']
+                pred_label_idx = LABEL2ID[event_type]
+                calibrate_record.append([gold_label_idx, pred_label_idx, confidence / 100])
+            if match_idn:
+                match_idn_num += 1
+                match_cls = {item for item in match_idn if item[2] == event_type}
+                if match_cls:
+                    match_cls_num += 1
+    
+    print(f"gold_tri_num: {gold_tri_num}, pred_tri_num: {pred_tri_num}, match_idn_num: {match_idn_num}, match_cls_num: {match_cls_num}, match_word_num: {match_word_num}")
+
+    tri_id_prec, tri_id_rec, tri_id_f = compute_f1(pred_tri_num, gold_tri_num, match_idn_num)
+    tri_cls_prec, tri_cls_rec, tri_cls_f = compute_f1(pred_tri_num, gold_tri_num, match_cls_num)
+    tri_word_prec, tri_word_rec, tri_word_f = compute_f1(pred_tri_num, gold_tri_num, match_word_num)
+    print('Trigger Identification: P: {:.2f}, R: {:.2f}, F: {:.2f}'.format(tri_id_prec * 100.0, tri_id_rec * 100.0, tri_id_f * 100.0))
+    print('Trigger Classification: P: {:.2f}, R: {:.2f}, F: {:.2f}'.format(tri_cls_prec * 100.0, tri_cls_rec * 100.0, tri_cls_f * 100.0))
+    print('Trigger Word Cls: P: {:.2f}, R: {:.2f}, F: {:.2f}'.format(tri_word_prec * 100.0, tri_word_rec * 100.0, tri_word_f * 100.0))
+    print('(Trigger Word Cls) Correct Mean Confidence: {:.2f}, Incorrect Mean Confidence: {:.2f}'.format(correct_confidence / match_word_num, incorrect_confidence / (pred_tri_num - match_word_num)))
+    print(f' Auto Rate: {if_reasonable_num/match_word_num}')
+
+    # Compute Expected Calibration Error (ECE)
+    assert len(calibrate_record) == (pred_tri_num - invalid_event_type)
+    print(invalid_event_type, len(calibrate_record))
+    label_idx, pred_idx, prob = zip(*calibrate_record)
+    labels = torch.tensor(label_idx)
+    preds = torch.zeros(len(calibrate_record), len(LABEL2ID), dtype=torch.float32)
+    preds[range(len(calibrate_record)), pred_idx] = torch.tensor(prob)
+    metric = MulticlassCalibrationError(num_classes=34, n_bins=50, norm='l1')
+    result = metric(preds, labels)
+    print('Expected Calibration Error: {:.5f}'.format(result))
+
+
+def filter_invalid_answer(preds):
+    
+    def if_invalid(record):
+        filter_words = ['unknown', 'Unknown', 'unspecified', 'not specified', 'not mentioned', 'None', 'none', 'not mentioned', 'not applicable', 'N/A']
+        if not isinstance(record[2], str):
+            return True
+        elif not isinstance(record[3], str):
+            return True
+        elif not isinstance(record[0], int):
+            return True
+        elif not isinstance(record[1], int):
+            return True
+        elif not record[0]>=0:
+            return True
+        elif not record[1]>=0:
+            return True
+        elif record[2] in filter_words:
+            return True
+        elif record[3] in filter_words:
+            return True
+        return False
+    
+    count = 0
+    for example_id in preds:
+        for record in preds[example_id][::-1]:
+            if if_invalid(record):
+                preds[example_id].remove(record)
+                count += 1
+    for example_id in preds:
+        for record in preds[example_id]:
+            record[1] += 1      # end + 1  
+    return count
+        
+
+def read_gold_example(path):
+    gold = {}
+    with open(path) as f:
+        lines = f.readlines()
+    for line in lines:
+        line = json.loads(line)
+        events = line['events']
+        gold[line['id']] = []
+        for event in events:
+            start = event['trigger']['start']
+            end = event['trigger']['end']
+            event_type = event['event_type'].replace(':','.')
+            trigger_word = event['trigger']['text']
+            gold[line['id']].append([start, end, event_type, trigger_word])
+
+    return gold
+
+
+def get_vocab():
+    all_labels = ['None']
+    for label in EVENT_TYPE:
+        all_labels.append(label)
+    label2id = {label: idx for idx, label in enumerate(all_labels)}
+    id2label = {idx: label for idx, label in enumerate(all_labels)}
+    return label2id, id2label
+
+
+def main(result_dir, gold_path):
+    gold = read_gold_example(gold_path)
+    
+    preds = {}
+    # print(len(os.listdir(result_dir)))
+    for file in os.listdir(result_dir):
+        example_id = file[:-5]
+        preds[example_id] = []
+        file_path = os.path.join(result_dir, file)
+        # print(file)
+        with open(file_path, 'r', encoding='utf-8') as f:
+            res = json.load(f)
+        for event in res:
+            start = event['start_word_index']
+            end = event['end_word_index']
+            event_type = event['event_type']
+            trigger_word = event['trigger']
+            confidence = event['confidence']
+            if_reasonable = event['if_reasonable']
+            preds[example_id].append([start, end, event_type, trigger_word, confidence, if_reasonable])
+    
+    invalid_arg_num = filter_invalid_answer(preds)
+    print(invalid_arg_num)
+    evaluate(preds, gold)
+
+
+LABEL2ID,ID2LABEL = get_vocab()
+if __name__ == "__main__":
+    result_dir = './Output/ED/Full_Testset/ED_E+_Closed'
+    gold_path = './data/ACE05-E+/ED_E+_gold.json'
+    main(result_dir, gold_path)
+    
diff --git a/Code/ED/score_ED_E.py b/Code/ED/score_ED_E.py