diff --git a/pyop2/configuration.py b/pyop2/configuration.py index efd78071c..665ef29e9 100644 --- a/pyop2/configuration.py +++ b/pyop2/configuration.py @@ -118,6 +118,8 @@ class Configuration(dict): ("PYOP2_LDFLAGS", str, ""), "simd_width": ("PYOP2_SIMD_WIDTH", int, 1), + "extra_info": + ("PYOP2_EXTRA_INFO", bool, False), "vectorization_strategy": ("PYOP2_VECT_STRATEGY", str, "cross-element"), "alignment": diff --git a/pyop2/parloop.py b/pyop2/parloop.py index 8384268cf..7cb5cb518 100644 --- a/pyop2/parloop.py +++ b/pyop2/parloop.py @@ -187,6 +187,10 @@ def _compute(self, part): :arg part: The :class:`SetPartition` to compute over. """ + if configuration["extra_info"]: + nbytes = self.comm.allreduce(self.nbytes) + if self.comm.Get_rank() == 0: + print("{0}_BYTES= {1}".format(self.global_kernel.name, nbytes)) with self._compute_event(): PETSc.Log.logFlops(part.size*self.num_flops) self.global_kernel(self.comm, part.offset, part.offset+part.size, *self.arglist) @@ -195,6 +199,22 @@ def _compute(self, part): def num_flops(self): return self.global_kernel.num_flops(self.iterset) + @cached_property + def nbytes(self): + nbytes = 0 + seen = set() + for arg in self.arguments: + nbytes += arg.data.nbytes + for map_ in arg.maps: + if map_ is None: + continue + for k in map_._kernel_args_: + if k in seen: + continue + nbytes += map_.values.nbytes + seen.add(k) + return nbytes + @mpi.collective def compute(self): # Parloop.compute is an alias for Parloop.__call__