Skip to content

Commit 3826a67

Browse files
committed
added new loader and small minor bugs in hyperparameter. Added training results.
1 parent 1c2080c commit 3826a67

File tree

10 files changed

+398
-7
lines changed

10 files changed

+398
-7
lines changed

README.md

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -180,7 +180,6 @@ original implementations (with proper licencing).
180180
* **[MoGAT](kgcnn/literature/MoGAT)**: [Multi-order graph attention network for water solubility prediction and interpretation](https://www.nature.com/articles/s41598-022-25701-5) by Lee et al. (2023)
181181
* **[MXMNet](kgcnn/literature/MXMNet)**: [Molecular Mechanics-Driven Graph Neural Network with Multiplex Graph for Molecular Structures](https://arxiv.org/abs/2011.07457) by Zhang et al. (2020)
182182
* **[NMPN](kgcnn/literature/NMPN)**: [Neural Message Passing for Quantum Chemistry](http://arxiv.org/abs/1704.01212) by Gilmer et al. (2017)
183-
* **[Unet](kgcnn/literature/Unet)**: [Graph U-Nets](http://proceedings.mlr.press/v97/gao19a/gao19a.pdf) by H. Gao and S. Ji (2019)
184183
* **[PAiNN](kgcnn/literature/PAiNN)**: [Equivariant message passing for the prediction of tensorial properties and molecular spectra](https://arxiv.org/pdf/2102.03150.pdf) by Schütt et al. (2020)
185184
* **[RGCN](kgcnn/literature/RGCN)**: [Modeling Relational Data with Graph Convolutional Networks](https://arxiv.org/abs/1703.06103) by Schlichtkrull et al. (2017)
186185
* **[rGIN](kgcnn/literature/rGIN)** [Random Features Strengthen Graph Neural Networks](https://arxiv.org/abs/2002.03155) by Sato et al. (2020)

kgcnn/data/base.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -330,7 +330,8 @@ def rename_property_on_graphs(self, old_property_name: str, new_property_name: s
330330
get = obtain_property
331331

332332
def tf_disjoint_data_generator(self, inputs, outputs, **kwargs):
333-
module_logger.info("Dataloader is experimental and not fully tested nor stable.")
333+
assert isinstance(inputs, list), "Dictionary input is not yet implemented"
334+
module_logger.info("Dataloader is experimental and not fully tested or stable.")
334335
return experimental_tf_disjoint_list_generator(self, inputs=inputs, outputs=outputs, **kwargs)
335336

336337

kgcnn/io/loader.py

Lines changed: 95 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
import keras as ks
2+
from typing import Union
23
import numpy as np
34
import tensorflow as tf
45

@@ -90,3 +91,97 @@ def generator():
9091
)
9192

9293
return data_loader
94+
95+
96+
def tf_disjoint_list_generator(
97+
graphs,
98+
inputs: list,
99+
outputs: list,
100+
assignment_to_id: list = None,
101+
assignment_of_indices: list = None,
102+
flag_batch_id: list = None,
103+
flag_count: list = None,
104+
flag_subgraph_id: list = None,
105+
batch_size=32,
106+
shuffle=True
107+
):
108+
109+
def generator():
110+
dataset_size = len(graphs)
111+
data_index = np.arange(dataset_size)
112+
num_inputs = len(inputs)
113+
all_flags = [flag_batch_id, flag_count, flag_subgraph_id]
114+
is_attributes = [True if all([x[i] is not None for x in all_flags]) else False for i in range(num_inputs)]
115+
where_batch = []
116+
where_subgraph= []
117+
where_count = []
118+
num_attributes = sum(is_attributes)
119+
120+
if shuffle:
121+
np.random.shuffle(data_index)
122+
123+
for batch_index in range(0, dataset_size, batch_size):
124+
idx = data_index[batch_index:batch_index + batch_size]
125+
graphs_batch = [graphs[i] for i in idx]
126+
127+
out = [None for _ in range(num_attributes)]
128+
out_counts = [None for _ in range(num_attributes)]
129+
130+
for i in range(num_inputs):
131+
if not is_attributes[i]:
132+
continue
133+
134+
array_list = [x[inputs[i]["name"]] for x in graphs_batch]
135+
if assignment_to_id[i] is None:
136+
out[i] = np.array(array_list, dtype=inputs[i]["dtype"])
137+
else:
138+
out[i] = np.concatenate(array_list, axis=0)
139+
counts = np.array([len(x) for x in array_list], dtype="int64")
140+
out_counts[i] = counts
141+
ids = assignment_to_id[i]
142+
if out[where_count[ids]] is not None:
143+
out[where_count[ids]] = counts
144+
if out[where_batch[ids]] is not None:
145+
out[where_batch[ids]] = np.repeat(np.arange(len(array_list), dtype="int64"), repeats=counts)
146+
if out[where_subgraph[ids]] is not None:
147+
out[where_subgraph[ids]] = np.concatenate([np.arange(x, dtype="int64") for x in counts], axis=0)
148+
149+
# Indices
150+
for i in range(num_inputs):
151+
if assignment_of_indices[i] is not None:
152+
edge_indices_flatten = out[i]
153+
count_nodes = out_counts[assignment_of_indices[i]]
154+
count_edges = out_counts[i]
155+
node_splits = np.pad(np.cumsum(count_nodes), [[1, 0]])
156+
offset_edge_indices = np.expand_dims(np.repeat(node_splits[:-1], count_edges), axis=-1)
157+
disjoint_indices = edge_indices_flatten + offset_edge_indices
158+
disjoint_indices = np.transpose(disjoint_indices)
159+
out[i] = disjoint_indices
160+
161+
if isinstance(outputs, list):
162+
out_y = []
163+
for k in range(len(outputs)):
164+
array_list = [x[outputs[k]["name"]] for x in graphs_batch]
165+
out_y.append(np.array(array_list, dtype=outputs[k]["dtype"]))
166+
else:
167+
out_y = np.array(
168+
[x[outputs["name"]] for x in graphs_batch], dtype=outputs["dtype"])
169+
170+
yield tuple(out), out_y
171+
172+
input_spec = tuple([tf.TensorSpec(shape=tuple([None] + list(x["shape"])), dtype=x["dtype"]) for x in inputs])
173+
174+
if isinstance(outputs, list):
175+
output_spec = tuple([tf.TensorSpec(shape=tuple([None] + list(x["shape"])), dtype=x["dtype"]) for x in outputs])
176+
else:
177+
output_spec = tf.TensorSpec(shape=tuple([None] + list(outputs["shape"])), dtype=outputs["dtype"])
178+
179+
data_loader = tf.data.Dataset.from_generator(
180+
generator,
181+
output_signature=(
182+
input_spec,
183+
output_spec
184+
)
185+
)
186+
187+
return data_loader

kgcnn/literature/NMPN/__init__.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
from ._make import make_model, model_default
2-
# from ._make import make_crystal_model, model_crystal_default
2+
from ._make import make_crystal_model, model_crystal_default
33

44

55
__all__ = [
66
"make_model",
77
"model_default",
8-
# "make_crystal_model",
9-
# "model_crystal_default"
8+
"make_crystal_model",
9+
"model_crystal_default"
1010
]

training/hyper/hyper_mp_jdft2d.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -232,7 +232,7 @@
232232
"input_tensor_type": "ragged",
233233
'input_embedding': None,
234234
"input_node_embedding": {"input_dim": 95, "output_dim": 64},
235-
"input_edge_embedding": {"input_dim": 100, "output_dim": 64},
235+
# "input_edge_embedding": {"input_dim": 100, "output_dim": 64},
236236
"make_distance": True, "expand_distance": True,
237237
'gauss_args': {"bins": 25, "distance": 5, "offset": 0.0, "sigma": 0.4},
238238
'meg_block_args': {'node_embed': [64, 32, 32], 'edge_embed': [64, 32, 32],

training/hyper/hyper_qm9_energies.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -94,7 +94,7 @@
9494
"input_tensor_type": "ragged",
9595
"input_embedding": None,
9696
"input_node_embedding": {"input_dim": 10, "output_dim": 16},
97-
"input_edge_embedding": {"input_dim": 100, "output_dim": 64},
97+
# "input_edge_embedding": {"input_dim": 100, "output_dim": 64},
9898
"gauss_args": {"bins": 20, "distance": 4, "offset": 0.0, "sigma": 0.4},
9999
"meg_block_args": {"node_embed": [64, 32, 32], "edge_embed": [64, 32, 32],
100100
"env_embed": [64, 32, 32], "activation": "kgcnn>softplus2"},
Lines changed: 156 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,156 @@
1+
OS: posix_linux
2+
backend: tensorflow
3+
cuda_available: 'True'
4+
data_unit: meV/atom
5+
date_time: '2023-12-15 14:11:18'
6+
device_id: '[LogicalDevice(name=''/device:CPU:0'', device_type=''CPU''), LogicalDevice(name=''/device:GPU:0'',
7+
device_type=''GPU'')]'
8+
device_memory: '[]'
9+
device_name: '[{}, {''compute_capability'': (8, 0), ''device_name'': ''NVIDIA A100
10+
80GB PCIe''}]'
11+
epochs:
12+
- 1000
13+
- 1000
14+
- 1000
15+
- 1000
16+
- 1000
17+
execute_folds: null
18+
kgcnn_version: 4.0.0
19+
learning_rate:
20+
- 1.1979999726463575e-05
21+
- 1.1979999726463575e-05
22+
- 1.1979999726463575e-05
23+
- 1.1979999726463575e-05
24+
- 1.1979999726463575e-05
25+
loss:
26+
- 0.0164102204144001
27+
- 0.049058035016059875
28+
- 0.019465263932943344
29+
- 0.04565507546067238
30+
- 0.018842527642846107
31+
max_learning_rate:
32+
- 0.0010000000474974513
33+
- 0.0010000000474974513
34+
- 0.0010000000474974513
35+
- 0.0010000000474974513
36+
- 0.0010000000474974513
37+
max_loss:
38+
- 0.4873706102371216
39+
- 0.4823596477508545
40+
- 0.4699196219444275
41+
- 0.44965362548828125
42+
- 0.46280232071876526
43+
max_scaled_mean_absolute_error:
44+
- 70.67137908935547
45+
- 67.2155532836914
46+
- 64.04151916503906
47+
- 57.88063049316406
48+
- 55.339359283447266
49+
max_scaled_root_mean_squared_error:
50+
- 152.97747802734375
51+
- 145.45938110351562
52+
- 142.39430236816406
53+
- 133.3281707763672
54+
- 124.1724624633789
55+
max_val_loss:
56+
- 0.26619401574134827
57+
- 0.3685653507709503
58+
- 0.40042510628700256
59+
- 0.6030166745185852
60+
- 0.6730947494506836
61+
max_val_scaled_mean_absolute_error:
62+
- 38.59397888183594
63+
- 51.37816619873047
64+
- 54.60137939453125
65+
- 77.64346313476562
66+
- 80.49958038330078
67+
max_val_scaled_root_mean_squared_error:
68+
- 83.75748443603516
69+
- 116.78331756591797
70+
- 139.4757537841797
71+
- 208.695556640625
72+
- 203.95716857910156
73+
min_learning_rate:
74+
- 1.1979999726463575e-05
75+
- 1.1979999726463575e-05
76+
- 1.1979999726463575e-05
77+
- 1.1979999726463575e-05
78+
- 1.1979999726463575e-05
79+
min_loss:
80+
- 0.0164102204144001
81+
- 0.049058035016059875
82+
- 0.019465263932943344
83+
- 0.04565083235502243
84+
- 0.018809372559189796
85+
min_scaled_mean_absolute_error:
86+
- 2.3664445877075195
87+
- 6.843836784362793
88+
- 2.6597533226013184
89+
- 5.826504707336426
90+
- 2.251401424407959
91+
min_scaled_root_mean_squared_error:
92+
- 12.384324073791504
93+
- 72.17166900634766
94+
- 12.613103866577148
95+
- 70.442138671875
96+
- 11.607565879821777
97+
min_val_loss:
98+
- 0.1912095546722412
99+
- 0.27041757106781006
100+
- 0.34239882230758667
101+
- 0.4209592342376709
102+
- 0.5613946318626404
103+
min_val_scaled_mean_absolute_error:
104+
- 27.72239875793457
105+
- 37.69633102416992
106+
- 46.68900680541992
107+
- 54.20203399658203
108+
- 67.14065551757812
109+
min_val_scaled_root_mean_squared_error:
110+
- 50.73049545288086
111+
- 101.22430419921875
112+
- 123.49358367919922
113+
- 149.4101104736328
114+
- 180.5041961669922
115+
model_class: make_crystal_model
116+
model_name: CGCNN
117+
model_version: '2023-11-28'
118+
multi_target_indices: null
119+
number_histories: 5
120+
scaled_mean_absolute_error:
121+
- 2.3664445877075195
122+
- 6.843836784362793
123+
- 2.6597533226013184
124+
- 5.827065944671631
125+
- 2.255373477935791
126+
scaled_root_mean_squared_error:
127+
- 12.439318656921387
128+
- 72.18438720703125
129+
- 12.624910354614258
130+
- 70.45165252685547
131+
- 11.619587898254395
132+
seed: 42
133+
time_list:
134+
- '0:01:59.293506'
135+
- '0:01:55.045981'
136+
- '0:01:54.909115'
137+
- '0:01:54.808906'
138+
- '0:01:57.223260'
139+
val_loss:
140+
- 0.25275886058807373
141+
- 0.29345396161079407
142+
- 0.3919019401073456
143+
- 0.5991652011871338
144+
- 0.6718166470527649
145+
val_scaled_mean_absolute_error:
146+
- 36.646087646484375
147+
- 40.90761184692383
148+
- 53.439170837402344
149+
- 77.14754486083984
150+
- 80.34671783447266
151+
val_scaled_root_mean_squared_error:
152+
- 79.33216857910156
153+
- 105.15802001953125
154+
- 138.79078674316406
155+
- 180.42710876464844
156+
- 199.3755645751953
Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
{"model": {"class_name": "make_crystal_model", "module_name": "kgcnn.literature.CGCNN", "config": {"name": "CGCNN", "inputs": [{"shape": [null], "name": "node_number", "dtype": "int64", "ragged": true}, {"shape": [null, 3], "name": "node_frac_coordinates", "dtype": "float64", "ragged": true}, {"shape": [null, 2], "name": "range_indices", "dtype": "int64", "ragged": true}, {"shape": [null, 3], "name": "range_image", "dtype": "float32", "ragged": true}, {"shape": [3, 3], "name": "graph_lattice", "dtype": "float64", "ragged": false}], "input_tensor_type": "ragged", "input_node_embedding": {"input_dim": 95, "output_dim": 64}, "representation": "unit", "expand_distance": true, "make_distances": true, "gauss_args": {"bins": 60, "distance": 6, "offset": 0.0, "sigma": 0.4}, "conv_layer_args": {"units": 128, "activation_s": "kgcnn>shifted_softplus", "activation_out": "kgcnn>shifted_softplus", "batch_normalization": true}, "node_pooling_args": {"pooling_method": "scatter_mean"}, "depth": 4, "output_mlp": {"use_bias": [true, true, false], "units": [128, 64, 1], "activation": ["kgcnn>shifted_softplus", "kgcnn>shifted_softplus", "linear"]}}}, "training": {"cross_validation": {"class_name": "KFold", "config": {"n_splits": 5, "random_state": 42, "shuffle": true}}, "fit": {"batch_size": 128, "epochs": 1000, "validation_freq": 10, "verbose": 2, "callbacks": [{"class_name": "kgcnn>LinearLearningRateScheduler", "config": {"learning_rate_start": 0.001, "learning_rate_stop": 1e-05, "epo_min": 500, "epo": 1000, "verbose": 0}}]}, "compile": {"optimizer": {"class_name": "Adam", "config": {"learning_rate": 0.001}}, "loss": "mean_absolute_error"}, "scaler": {"class_name": "StandardLabelScaler", "module_name": "kgcnn.data.transform.scaler.standard", "config": {"with_std": true, "with_mean": true, "copy": true}}}, "data": {"data_unit": "meV/atom"}, "info": {"postfix": "", "postfix_file": "", "kgcnn_version": "4.0.0"}, "dataset": {"class_name": "MatProjectJdft2dDataset", "module_name": "kgcnn.data.datasets.MatProjectJdft2dDataset", "config": {}, "methods": [{"map_list": {"method": "set_range_periodic", "max_distance": 6.0}}]}}

0 commit comments

Comments
 (0)