-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathencoding.py
More file actions
109 lines (86 loc) · 3.31 KB
/
encoding.py
File metadata and controls
109 lines (86 loc) · 3.31 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Encoding of Graph as Text
@author: Anonymous Authors
"""
import json
from llm import (
MODEL_LLAMA,
MODEL_DEEPSEEK,
MODEL_MISTRAL,
MODEL_PHI,
MODEL_GEMMA,
MODEL_QWEN,
create_completion,
stop_model
)
###############################################################################
def encode_graph_to_text(graph):
"""
Encodes a ScopeAwarePropertyGraph into a textual representation.
Args:
graph (ScopeAwarePropertyGraph): The graph to encode.
Returns:
str: A textual representation of the graph.
"""
text_representation = []
for node in graph.nodes(data=True):
node_id, attributes = node
attributes_text = ", ".join(f"{key}: {value}" for key, value in attributes.items())
text_representation.append(f"Node {node_id}: {attributes_text}")
for edge in graph.edges(data=True):
source, target, attributes = edge
attributes_text = ", ".join(f"{key}: {value}" for key, value in attributes.items())
text_representation.append(f"Edge {source} -> {target}: {attributes_text}")
return "\n".join(text_representation)
def encode_graph_to_text_with_llm(model, graph):
"""
Encodes a ScopeAwarePropertyGraph into a textual representation.
Args:
graph (ScopeAwarePropertyGraph): The graph to encode.
Returns:
str: A textual representation of the graph.
"""
text_representation = encode_graph_to_text(graph)
system_prompt = (
"You are an intelligent AI assistant. The user will provide you with "
"a graph in Node/Edge representation. Your task is to describe it into "
"natural language sentences. You may use original labels in brackets, "
"but the sentences should be proper English sentences without too much "
"syntactic clutter. Node IDs should be preserved (in brackets). "
"You do not output anything else nor any kind of explanation."
)
user_prompt = (
"Describe the following graph in a natural language:\n"
"---\n"
f"{text_representation}\n"
)
response = create_completion(model, user_prompt, system_prompt)
response_json = response.model_dump_json(indent=2)
human_text_representation = response.content
return human_text_representation, response_json
def encode_graph_to_text_with_template(record, template, variables):
variable_values = []
for nested_key_tuple in variables:
value = record.copy()
for key in nested_key_tuple:
value = value.get(key)
variable_values.append(value)
# print(template)
# print(record)
# print(variables)
# print(variable_values)
return template.format(*variable_values)
###############################################################################
if __name__ == "__main__":
import pickle
from graph import PropertyGraph
with open("inconsistencies.pkl", "rb") as f:
inconsistencies = pickle.load(f)
ic_template = inconsistencies["template"]
graph_record = inconsistencies["record"][0]
sapg = inconsistencies["graph"][0]
sapg_text = encode_graph_to_text(sapg)
sapg_llm_text, response_json = encode_graph_to_text_with_llm(sapg)
sapg_template_text = encode_graph_to_text_with_template(graph_record, ic_template)