Thanks for your great work! When I retrain your model, I encountered gradient explosion. I use the same training data as yours. Could you please tell me why this problem happened?
The is the output:
Epoch 18 Valid
, psnr 21.02926874
loss = 0.05985299
Traceback (most recent call last):
File "/home/xym/anaconda3/envs/nbgd/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1334, in _do_call
return fn(*args)
File "/home/xym/anaconda3/envs/nbgd/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1319, in _run_fn
options, feed_dict, fetch_list, target_list, run_metadata)
File "/home/xym/anaconda3/envs/nbgd/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1407, in _call_tf_sessionrun
run_metadata)
tensorflow.python.framework.errors_impl.InvalidArgumentError: Nan in summary histogram for: autoencoder/encoder/en_3/conv/weight_1
[[{{node autoencoder/encoder/en_3/conv/weight_1}}]]
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "network_train.py", line 194, in
[summary_merge, ae_net['denoised_1_bd'], loss_grid_L1, train_step1], feed_dict)
File "/home/xym/anaconda3/envs/nbgd/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 929, in run
run_metadata_ptr)
File "/home/xym/anaconda3/envs/nbgd/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1152, in _run
feed_dict_tensor, options, run_metadata)
File "/home/xym/anaconda3/envs/nbgd/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1328, in _do_run
run_metadata)
File "/home/xym/anaconda3/envs/nbgd/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1348, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.InvalidArgumentError: Nan in summary histogram for: autoencoder/encoder/en_3/conv/weight_1
[[node autoencoder/encoder/en_3/conv/weight_1 (defined at /mnt/d/Compares/mr-kpcn/MultiResolutionKernelPredictionCNN/network.py:180) ]]
Caused by op 'autoencoder/encoder/en_3/conv/weight_1', defined at:
File "network_train.py", line 129, in
ae_net = model.inference()
File "/mnt/d/Compares/mr-kpcn/MultiResolutionKernelPredictionCNN/network.py", line 31, in inference
kernel_1, kernel_2, kernel_3 = self.autoencoder(ae_input, net_name="ae")
File "/mnt/d/Compares/mr-kpcn/MultiResolutionKernelPredictionCNN/network.py", line 112, in autoencoder
en_conv_out_3 = self.conv_layer(current_input, 48)
File "/mnt/d/Compares/mr-kpcn/MultiResolutionKernelPredictionCNN/network.py", line 180, in conv_layer
tf.summary.histogram('weight', W)
File "/home/xym/anaconda3/envs/nbgd/lib/python3.6/site-packages/tensorflow/python/summary/summary.py", line 177, in histogram
tag=tag, values=values, name=scope)
File "/home/xym/anaconda3/envs/nbgd/lib/python3.6/site-packages/tensorflow/python/ops/gen_logging_ops.py", line 312, in histogram_summary
"HistogramSummary", tag=tag, values=values, name=name)
File "/home/xym/anaconda3/envs/nbgd/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 788, in _apply_op_helper
op_def=op_def)
File "/home/xym/anaconda3/envs/nbgd/lib/python3.6/site-packages/tensorflow/python/util/deprecation.py", line 507, in new_func
return func(*args, **kwargs)
File "/home/xym/anaconda3/envs/nbgd/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 3300, in create_op
op_def=op_def)
File "/home/xym/anaconda3/envs/nbgd/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1801, in init
self._traceback = tf_stack.extract_stack()
InvalidArgumentError (see above for traceback): Nan in summary histogram for: autoencoder/encoder/en_3/conv/weight_1
[[node autoencoder/encoder/en_3/conv/weight_1 (defined at /mnt/d/Compares/mr-kpcn/MultiResolutionKernelPredictionCNN/network.py:180) ]]
Thanks for your great work! When I retrain your model, I encountered gradient explosion. I use the same training data as yours. Could you please tell me why this problem happened?
The is the output:
Epoch 18 Valid
, psnr 21.02926874
loss = 0.05985299
Traceback (most recent call last):
File "/home/xym/anaconda3/envs/nbgd/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1334, in _do_call
return fn(*args)
File "/home/xym/anaconda3/envs/nbgd/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1319, in _run_fn
options, feed_dict, fetch_list, target_list, run_metadata)
File "/home/xym/anaconda3/envs/nbgd/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1407, in _call_tf_sessionrun
run_metadata)
tensorflow.python.framework.errors_impl.InvalidArgumentError: Nan in summary histogram for: autoencoder/encoder/en_3/conv/weight_1
[[{{node autoencoder/encoder/en_3/conv/weight_1}}]]
During handling of the above exception, another exception occurred:
Traceback (most recent call last):
File "network_train.py", line 194, in
[summary_merge, ae_net['denoised_1_bd'], loss_grid_L1, train_step1], feed_dict)
File "/home/xym/anaconda3/envs/nbgd/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 929, in run
run_metadata_ptr)
File "/home/xym/anaconda3/envs/nbgd/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1152, in _run
feed_dict_tensor, options, run_metadata)
File "/home/xym/anaconda3/envs/nbgd/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1328, in _do_run
run_metadata)
File "/home/xym/anaconda3/envs/nbgd/lib/python3.6/site-packages/tensorflow/python/client/session.py", line 1348, in _do_call
raise type(e)(node_def, op, message)
tensorflow.python.framework.errors_impl.InvalidArgumentError: Nan in summary histogram for: autoencoder/encoder/en_3/conv/weight_1
[[node autoencoder/encoder/en_3/conv/weight_1 (defined at /mnt/d/Compares/mr-kpcn/MultiResolutionKernelPredictionCNN/network.py:180) ]]
Caused by op 'autoencoder/encoder/en_3/conv/weight_1', defined at:
File "network_train.py", line 129, in
ae_net = model.inference()
File "/mnt/d/Compares/mr-kpcn/MultiResolutionKernelPredictionCNN/network.py", line 31, in inference
kernel_1, kernel_2, kernel_3 = self.autoencoder(ae_input, net_name="ae")
File "/mnt/d/Compares/mr-kpcn/MultiResolutionKernelPredictionCNN/network.py", line 112, in autoencoder
en_conv_out_3 = self.conv_layer(current_input, 48)
File "/mnt/d/Compares/mr-kpcn/MultiResolutionKernelPredictionCNN/network.py", line 180, in conv_layer
tf.summary.histogram('weight', W)
File "/home/xym/anaconda3/envs/nbgd/lib/python3.6/site-packages/tensorflow/python/summary/summary.py", line 177, in histogram
tag=tag, values=values, name=scope)
File "/home/xym/anaconda3/envs/nbgd/lib/python3.6/site-packages/tensorflow/python/ops/gen_logging_ops.py", line 312, in histogram_summary
"HistogramSummary", tag=tag, values=values, name=name)
File "/home/xym/anaconda3/envs/nbgd/lib/python3.6/site-packages/tensorflow/python/framework/op_def_library.py", line 788, in _apply_op_helper
op_def=op_def)
File "/home/xym/anaconda3/envs/nbgd/lib/python3.6/site-packages/tensorflow/python/util/deprecation.py", line 507, in new_func
return func(*args, **kwargs)
File "/home/xym/anaconda3/envs/nbgd/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 3300, in create_op
op_def=op_def)
File "/home/xym/anaconda3/envs/nbgd/lib/python3.6/site-packages/tensorflow/python/framework/ops.py", line 1801, in init
self._traceback = tf_stack.extract_stack()
InvalidArgumentError (see above for traceback): Nan in summary histogram for: autoencoder/encoder/en_3/conv/weight_1
[[node autoencoder/encoder/en_3/conv/weight_1 (defined at /mnt/d/Compares/mr-kpcn/MultiResolutionKernelPredictionCNN/network.py:180) ]]