Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion devito/arch/compiler.py
Original file line number Diff line number Diff line change
Expand Up @@ -769,7 +769,7 @@ def __init_finalize__(self, **kwargs):
if not configuration['safe-math']:
self.cflags.append('--use_fast_math')

if configuration['profiling'] == 'advanced2':
if configuration['profiling'] in ('advanced2', 'ncu'):
# Optionally print out per-kernel shared memory and register usage
self.cflags.append('--ptxas-options=-v')

Expand Down
8 changes: 6 additions & 2 deletions devito/core/cpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,8 @@
from devito.operator.operator import rcompile
from devito.passes import stream_dimensions
from devito.passes.clusters import (
Lift, blocking, buffering, cire, cse, factorize, fission, fuse, optimize_hyperplanes,
optimize_pows
Lift, apply_par_tiles, blocking, buffering, cire, cse, factorize, fission, fuse,
optimize_hyperplanes, optimize_pows
)
from devito.passes.equations import collect_derivatives
from devito.passes.iet import (
Expand Down Expand Up @@ -67,6 +67,7 @@ def _normalize_kwargs(cls, **kwargs):
reduce=oo.pop('par-tile-reduce', None))

# CIRE
o['cire-block-temps'] = oo.pop('cire-block-temps', cls.CIRE_BLOCK_TEMPS)
o['min-storage'] = oo.pop('min-storage', False)
o['cire-rotate'] = oo.pop('cire-rotate', False)
o['cire-maxpar'] = oo.pop('cire-maxpar', False)
Expand Down Expand Up @@ -198,6 +199,9 @@ def _specialize_clusters(cls, clusters, **kwargs):
if options['blocklazy']:
clusters = blocking(clusters, sregistry, options)

# Unfold the `par-tile`s, if any
clusters = apply_par_tiles(clusters, **kwargs)

return clusters

@classmethod
Expand Down
12 changes: 9 additions & 3 deletions devito/core/gpu.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,8 @@
from devito.operator.operator import rcompile
from devito.passes import is_on_device, stream_dimensions
from devito.passes.clusters import (
Lift, blocking, buffering, cire, cse, factorize, fission, fuse, memcpy_prefetch,
optimize_pows, tasking
Lift, apply_par_tiles, blocking, buffering, cire, cse, factorize, fission, fuse,
memcpy_prefetch, optimize_pows, tasking
)
from devito.passes.equations import collect_derivatives
from devito.passes.iet import (
Expand Down Expand Up @@ -38,7 +38,9 @@

class DeviceOperatorMixin:

# Overrides the default values in the main Operator class
BLOCK_LEVELS = 0
CIRE_BLOCK_TEMPS = False

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Is it intended to be a default or enforced?

Copy link
Copy Markdown
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

enforced or things would break

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

then it needs to be moved to line 81 below and prevent it from being a user option (i.e no oo.pop)

MPI_MODES = (True, 'basic',)

GPU_FIT = 'all-fallback'
Expand Down Expand Up @@ -76,9 +78,10 @@ def _normalize_kwargs(cls, **kwargs):
o['skewing'] = oo.pop('skewing', False)

# CIRE
o['cire-block-temps'] = oo.pop('cire-block-temps', cls.CIRE_BLOCK_TEMPS)
o['min-storage'] = False
o['cire-rotate'] = False
o['cire-maxpar'] = oo.pop('cire-maxpar', True)
o['cire-maxpar'] = oo.pop('cire-maxpar', 'basic')
o['cire-ftemps'] = oo.pop('cire-ftemps', False)
o['cire-mingain'] = oo.pop('cire-mingain', cls.CIRE_MINGAIN)
o['cire-minmem'] = oo.pop('cire-minmem', cls.CIRE_MINMEM)
Expand Down Expand Up @@ -239,6 +242,9 @@ def _specialize_clusters(cls, clusters, **kwargs):
if options['blocklazy']:
clusters = blocking(clusters, sregistry, options)

# Unfold the `par-tile`s, if any
clusters = apply_par_tiles(clusters, **kwargs)

return clusters

@classmethod
Expand Down
10 changes: 10 additions & 0 deletions devito/core/operator.py
Original file line number Diff line number Diff line change
Expand Up @@ -58,6 +58,13 @@ class BasicOperator(Operator):
situations where the performance impact might be detrimental.
"""

CIRE_BLOCK_TEMPS = True
"""
If an aliasing expression is computed within a blocked loop nest, all CIRE-
generated temporaries will inherit the block shape. If set to False, the
temporaries shape will systematically be defined by the root Dimensions.
"""

CIRE_MINGAIN = 10
"""
Minimum operation count reduction for a redundant expression to be optimized
Expand Down Expand Up @@ -240,6 +247,9 @@ def _check_kwargs(cls, **kwargs):
if oo['mpi'] and oo['mpi'] not in cls.MPI_MODES:
raise InvalidOperator(f"Unsupported MPI mode `{oo['mpi']}`")

if oo['cire-maxpar'] not in (False, 'basic', 'compact'):
raise InvalidOperator("Illegal `cire-maxpar` value")

if oo['cse-algo'] not in ('basic', 'smartsort', 'advanced'):
raise InvalidOperator("Illegal `cse-algo` value")

Expand Down
12 changes: 10 additions & 2 deletions devito/finite_differences/differentiable.py
Original file line number Diff line number Diff line change
Expand Up @@ -985,8 +985,9 @@ def value(self, idx):
class IndexDerivative(IndexSum):

__rargs__ = ('expr', 'mapper')
__rkwargs__ = IndexSum.__rkwargs__ + ('deriv_order',)

def __new__(cls, expr, mapper, **kwargs):
def __new__(cls, expr, mapper, deriv_order=None, **kwargs):
dimensions = as_tuple(set(mapper.values()))

# Detect the Weights among the arguments
Expand All @@ -1008,6 +1009,8 @@ def __new__(cls, expr, mapper, **kwargs):
obj._weights = weights
obj._mapper = frozendict(mapper)

obj._deriv_order = deriv_order

return obj

def _hashable_content(self):
Expand Down Expand Up @@ -1040,6 +1043,10 @@ def weights(self):
def mapper(self):
return self._mapper

@property
def deriv_order(self):
return self._deriv_order

@property
def depth(self):
iderivs = self.expr.find(IndexDerivative)
Expand Down Expand Up @@ -1216,7 +1223,8 @@ def _diff2sympy(obj):

# Handle special objects
if isinstance(obj, DiffDerivative):
return IndexDerivative(*args, obj.mapper), True
return IndexDerivative(*args, obj.mapper,
deriv_order=obj.deriv_order), True

# Handle generic objects such as arithmetic operations
try:
Expand Down
4 changes: 3 additions & 1 deletion devito/finite_differences/finite_difference.py
Original file line number Diff line number Diff line change
Expand Up @@ -210,7 +210,9 @@ def make_derivative(expr, dim, fd_order, deriv_order, side, matvec, x0, coeffici
with suppress(AttributeError):
expr = expr._evaluate(expand=False)

deriv = DiffDerivative(expr*weights, {dim: indices.free_dim})
deriv = DiffDerivative(
expr*weights, {dim: indices.free_dim}, deriv_order=deriv_order
)
else:
terms = []
for i, c in zip(indices, weights, strict=True):
Expand Down
93 changes: 50 additions & 43 deletions devito/ir/clusters/cluster.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,7 +13,7 @@
)
from devito.mpi.halo_scheme import HaloScheme, HaloTouch
from devito.mpi.reduction_scheme import DistReduce
from devito.symbolics import estimate_cost
from devito.symbolics import estimate_cost, uxreplace
from devito.tools import (
CacheInstances, as_tuple, cached_hash, filter_ordered, flatten, infer_dtype
)
Expand Down Expand Up @@ -112,7 +112,7 @@ def dimensions(self):
@cached_property
def exprs_dimensions(self):
"""
The Dimensions that appear explicitly in the Cluster expressions.
The Dimensions that appear explicitly in the expressions.
"""
dims_explicit = {i for i in self.free_symbols if i.is_Dimension}
dims_implicit = {d for e in self.exprs for d in e.implicit_dims}
Expand All @@ -121,7 +121,7 @@ def exprs_dimensions(self):
@cached_property
def guards_dimensions(self):
"""
The Dimensions that appear explicitly in the Cluster guards.
The Dimensions that appear explicitly in the guards.
"""
syms_guards = {d for e in self.guards.values() for d in e.free_symbols}
dims_guards = {i for i in syms_guards if i.is_Dimension}
Expand All @@ -142,7 +142,7 @@ def used_dimensions(self):
@cached_property
def dist_dimensions(self):
"""
The Cluster's distributed Dimensions.
The distributed Dimensions.
"""
ret = set()
for f in self.functions:
Expand All @@ -168,7 +168,7 @@ def grid(self):
elif len(grids) == 1:
return grids.pop()
else:
raise ValueError("Cluster has no unique Grid")
raise ValueError("Multiple Grids detected")

@cached_property
def is_scalar(self):
Expand Down Expand Up @@ -296,31 +296,27 @@ def is_glb_load_to_mem_shared(self):
@cached_property
def is_async(self):
"""
True if an asynchronous Cluster, False otherwise.
True if asynchronous, False otherwise.
"""
return any(isinstance(s, (WithLock, PrefetchUpdate))
for s in flatten(self.syncs.values()))

@cached_property
def is_wait(self):
"""
True if a Cluster waiting on a lock (that is a special synchronization
operation), False otherwise.
True if waiting on a lock (that is a special synchronization operation),
False otherwise.
"""
return any(isinstance(s, WaitLock)
for s in flatten(self.syncs.values()))

@cached_property
def dtype(self):
"""
The arithmetic data type of the Cluster.
The arithmetic data type of the enclosed expressions.

If the Cluster performs floating point arithmetic, then the expressions
performing integer arithmetic are ignored, assuming that they are only
carrying out array index calculations.

If two expressions perform calculations with different precision,
the data type with highest precision is returned.
If two expressions perform calculations with different precision, the data
type with highest precision is returned.
"""
dtypes = set()
for i in self.exprs:
Expand All @@ -336,8 +332,8 @@ def dtype(self):
@cached_property
def dspace(self):
"""
Derive the DataSpace of the Cluster from its expressions,
IterationSpace, and Guards.
The DataSpace deriving from the enclosed expressions, IterationSpace,
and Guards.
"""
accesses = detect_accesses(self.exprs)

Expand Down Expand Up @@ -421,8 +417,8 @@ def ops(self):
@cached_property
def traffic(self):
"""
The Cluster compulsory traffic (number of reads/writes), as a mapper
from Functions to IntervalGroups.
The compulsory traffic (number of reads/writes), as a mapper from
Functions to IntervalGroups.

Notes
-----
Expand Down Expand Up @@ -509,30 +505,6 @@ def __getattr__(self, name):
raise AttributeError(name) from None
return getattr(block, name)

@property
def exprs(self):
return self._block.exprs

@property
def ispace(self):
return self._block.ispace

@property
def guards(self):
return self._block.guards

@property
def properties(self):
return self._block.properties

@property
def syncs(self):
return self._block.syncs

@property
def halo_scheme(self):
return self._block.halo_scheme

@classmethod
def from_clusters(cls, *clusters):
"""
Expand Down Expand Up @@ -612,6 +584,33 @@ def rebuild(self, *args, **kwargs):
syncs=syncs,
halo_scheme=halo_scheme)

def subs(self, mapper, compact=()):
"""
Build a new Cluster applying substitutions rules to `self`.
"""
if not mapper:
return self

if self.halo_scheme:
raise NotImplementedError

key0 = lambda i: i.is_Block
subs0 = {d: self.ispace[d].promote(key0).dim for d in compact}

subs = {**mapper, **subs0}
exprs = [uxreplace(e, subs) for e in self.exprs]

ispace = self.ispace.switch(mapper)
key = lambda i: key0(i) and i in flatten(d._defines for d in subs0)
ispace = ispace.promote(key, mode='total')

guards = self.guards.subs(mapper).promote(subs0)
properties = self.properties.subs(mapper).promote(subs0)
syncs = self.syncs.subs(mapper)

return self.__class__(exprs=exprs, ispace=ispace, guards=guards,
properties=properties, syncs=syncs)


class ClusterGroup(tuple):

Expand Down Expand Up @@ -691,6 +690,14 @@ def dspace(self):
"""Return the DataSpace of this ClusterGroup."""
return DataSpace.union(*[i.dspace.reset() for i in self])

@property

Copy link
Copy Markdown
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

cached?

def is_dense(self):
return all(i.is_dense for i in self)

@property
def is_wild(self):
return all(i.is_wild for i in self)

@property
def is_halo_touch(self):
return all(i.is_halo_touch for i in self)
Expand Down
21 changes: 12 additions & 9 deletions devito/ir/clusters/visitors.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,15 +37,7 @@ def _make_key(self, cluster, level):
assert self._q_ispace_in_key
ispace = cluster.ispace[:level]

if self._q_guards_in_key:
try:
guards = tuple(cluster.guards.get(i.dim) for i in ispace)
except AttributeError:
# `cluster` is actually a ClusterGroup
assert len(cluster.guards) == 1
guards = tuple(cluster.guards[0].get(i.dim) for i in ispace)
else:
guards = None
guards = self._make_key_guards(cluster, ispace)

if self._q_properties_in_key:
properties = cluster.properties.drop(cluster.ispace[level:].itdims)
Expand All @@ -68,6 +60,17 @@ def _make_key(self, cluster, level):

return (prefix,) + subkey

def _make_key_guards(self, cluster, ispace):
if not self._q_guards_in_key:
return None

try:
return tuple(cluster.guards.get(i.dim) for i in ispace)
except AttributeError:
# `cluster` is actually a ClusterGroup
assert len(cluster.guards) == 1
return tuple(cluster.guards[0].get(i.dim) for i in ispace)

def _make_key_hook(self, cluster, level):
return ()

Expand Down
Loading
Loading