Skip to content

check_implemented_domains still fails #770

@isuruf

Description

@isuruf

Following kernel still fails with

LoopyError: sanity check failed--implemented and desired domain for instruction 'prefetch_insn2' do not match

implemented: [ntgt_boxes, isrc_box_end, isrc_box_start] -> { [itgt_box, isrc_box, inner, itgt_offset_outer = 0, iprefetch = 0, isrc_prefetch_inner = 0] : 0 <= itgt_box < ntgt_boxes and isrc_box_start <= isrc_box < isrc_box_end }

desired:[ntgt_boxes, isrc_box_end, isrc_box_start] -> { [itgt_box, isrc_box, inner, itgt_offset_outer, iprefetch, isrc_prefetch_inner] : itgt_offset_outer = 0 and iprefetch = 0 and isrc_prefetch_inner = 0 and 0 <= itgt_box < ntgt_boxes and isrc_box_start <= isrc_box < isrc_box_end and 0 <= inner <= 25 }

sample point in implemented but not desired: isrc_prefetch_inner=0, iprefetch=0, isrc_box_start=-1, isrc_box_end=0, ntgt_boxes=1, inner=-1, itgt_box=0, isrc_box=-1, itgt_offset_outer=0
gist of constraints in implemented but not desired: [ntgt_boxes, isrc_box_end, isrc_box_start] -> { [itgt_box, isrc_box, inner, itgt_offset_outer, iprefetch, isrc_prefetch_inner] : 0 <= inner <= 25 }
Details
import loopy as lp
import numpy as np
from pymbolic.primitives import *
import immutables


p2p_knl = lp.make_kernel(
    [
    "[ntgt_boxes] -> { [itgt_box] : 0 <= itgt_box < ntgt_boxes }",
    "{ [iknl] : iknl = 0 }",
    "[isrc_box_end, isrc_box_start] -> { [isrc_box] : isrc_box_start <= isrc_box < isrc_box_end }",
    "{ [idim, idim_0, idim_1] : 0 <= idim <= 1 and 0 <= idim_0 <= 1 and 0 <= idim_1 <= 1 }",
    "{ [istrength] : istrength = 0 }",
    "{ [inner] : 0 <= inner <= 31 }",
    "{ [itgt_offset_outer] : itgt_offset_outer = 0 }",
    "{ [iprefetch] : iprefetch = 0 }",
    "[inner] -> { [isrc_prefetch_inner] : isrc_prefetch_inner = 0 and 0 <= inner <= 25 }",
    "[iprefetch, isrc_end, isrc_start] -> { [isrc_offset] : isrc_offset >= 0 and -26iprefetch <= isrc_offset < -26iprefetch + isrc_end - isrc_start and isrc_offset <= 25 }",
    ],
    '''

    knl_0_scaling = (1 / 8)*3.141592653589793**(-1) {id=insn, inames=+inner:itgt_box}
    tgt_ibox = target_boxes[itgt_box] {id=insn_0, inames=inner:itgt_box}
    itgt_start = box_target_starts[tgt_ibox] {id=insn_1, dep=insn_0, inames=inner:itgt_box}
    itgt_end = itgt_start + box_target_counts_nonchild[tgt_ibox] {id=insn_2, dep=insn_0:insn_1, inames=inner:itgt_box}
    isrc_box_start = source_box_starts[itgt_box] {id=insn_3, inames=inner:itgt_box}
    isrc_box_end = source_box_starts[itgt_box + 1] {id=insn_4, inames=inner:itgt_box}
    itgt_offset = itgt_offset_outer*32 + inner {id=insn_5, inames=inner:itgt_offset_outer:itgt_box}
    itgt = itgt_offset + itgt_start {id=insn_6, dep=insn_5:insn_1, inames=inner:itgt_offset_outer:itgt_box}
    cond_itgt = itgt < itgt_end {id=insn_7, dep=insn_2:insn_6, inames=inner:itgt_offset_outer:itgt_box}
    acc[iknl] = 0 {id=init_acc, inames=iknl:inner:itgt_offset_outer:itgt_box}
    tgt_center[idim_0] = targets[idim_0, itgt] {id=prefetch_tgt, dep=insn_7:insn_6, inames=inner:itgt_offset_outer:idim_0:itgt_box}
    src_ibox = source_box_lists[isrc_box] {id=src_box_insn_0, inames=inner:itgt_offset_outer:isrc_box:itgt_box}
    isrc_start = box_source_starts[src_ibox] {id=src_box_insn_1, dep=src_box_insn_0, inames=inner:itgt_offset_outer:isrc_box:itgt_box}
    isrc_end = isrc_start + box_source_counts_nonchild[src_ibox] {id=src_box_insn_2, dep=src_box_insn_1:src_box_insn_0, inames=inner:itgt_offset_outer:isrc_box:itgt_box}
    isrc_prefetch_new = isrc_prefetch_inner*32 + inner {id=prefetch_insn1, inames=itgt_offset_outer:iprefetch:isrc_prefetch_inner:inner:isrc_box:itgt_box}
    isrc_prefetch = iprefetch*26 + isrc_prefetch_inner*32 + inner {id=prefetch_insn2, inames=itgt_offset_outer:iprefetch:isrc_prefetch_inner:inner:isrc_box:itgt_box}
    cond_isrc = isrc_prefetch < isrc_end + (-1)*isrc_start {id=prefetch_insn3, dep=prefetch_insn2:src_box_insn_2:src_box_insn_1, inames=itgt_offset_outer:iprefetch:isrc_prefetch_inner:inner:isrc_box:itgt_box}
    local_isrc[idim_1, isrc_prefetch_new] = sources[idim_1, isrc_prefetch + isrc_start] {id=prefetch_src, dep=prefetch_insn3:prefetch_insn2:prefetch_insn1:src_box_insn_1, inames=itgt_offset_outer:iprefetch:isrc_prefetch_inner:inner:idim_1:isrc_box:itgt_box}
    local_isrc[istrength + 2, isrc_prefetch_new] = strength[istrength, isrc_prefetch + isrc_start] {id=prefetch_charge, dep=prefetch_insn3:prefetch_insn2:prefetch_insn1:src_box_insn_1, inames=itgt_offset_outer:iprefetch:isrc_prefetch_inner:inner:istrength:isrc_box:itgt_box}
    isrc = isrc_offset + iprefetch*26 + isrc_start {id=insn_8, dep=insn_7:src_box_insn_1, inames=itgt_offset_outer:isrc_offset:iprefetch:inner:isrc_box:itgt_box}
    d[idim] = tgt_center[idim] + (-1)*local_isrc[idim, isrc_offset] {id=insn_9, dep=prefetch_src:insn_7:prefetch_tgt, inames=itgt_offset_outer:isrc_offset:iprefetch:inner:idim:isrc_box:itgt_box}
    strength_0 = local_isrc[2, isrc_offset] {id=insn_10, dep=insn_7:prefetch_charge, inames=itgt_offset_outer:isrc_offset:iprefetch:inner:isrc_box:itgt_box}
    expr = d[0]*d[0] + d[1]*d[1] {id=insn_11, dep=insn_7:insn_9, inames=itgt_offset_outer:isrc_offset:iprefetch:inner:isrc_box:itgt_box}
    cse_exprvar = sqrt(expr) {id=insn_12, dep=insn_7:insn_11, inames=itgt_offset_outer:isrc_offset:iprefetch:inner:isrc_box:itgt_box}
    pair_result_0 = expr*log(cse_exprvar)*strength_0 {id=insn_13, dep=insn_11:insn_12:insn_10:insn_7, inames=itgt_offset_outer:isrc_offset:iprefetch:inner:isrc_box:itgt_box}
    acc[0] = acc[0] + pair_result_0 {id=update_acc_0, dep=insn_7:init_acc:insn_13, inames=itgt_offset_outer:isrc_offset:iprefetch:inner:isrc_box:itgt_box}
    result[0, itgt] = knl_0_scaling*acc[0] {id=write_csr, dep=insn_7:update_acc_0:insn_6:insn, inames=inner:itgt_offset_outer:itgt_box}
    ''', [
        lp.GlobalArg(
            name="sources", dtype=np.float64,
            shape=(2, Variable('nsources')), for_atomic=False),
        lp.GlobalArg(
            name="sources_s0", dtype=np.float64,
            shape=(Variable('nsources'),), for_atomic=False),
        lp.GlobalArg(
            name="sources_s1", dtype=np.float64,
            shape=(Variable('nsources'),), for_atomic=False),
        lp.GlobalArg(
            name="targets", dtype=np.float64,
            shape=(2, Variable('ntargets')), for_atomic=False),
        lp.GlobalArg(
            name="targets_s0", dtype=np.float64,
            shape=(Variable('ntargets'),), for_atomic=False),
        lp.GlobalArg(
            name="targets_s1", dtype=np.float64,
            shape=(Variable('ntargets'),), for_atomic=False),
        lp.ValueArg(
            name="nsources",
            dtype=np.int32),
        lp.ValueArg(
            name="ntargets",
            dtype=np.int32),
        lp.GlobalArg(
            name="box_target_starts", dtype=np.int32,
            shape=None, for_atomic=False),
        lp.GlobalArg(
            name="box_target_counts_nonchild", dtype=np.int32,
            shape=None, for_atomic=False),
        lp.GlobalArg(
            name="box_source_starts", dtype=np.int32,
            shape=None, for_atomic=False),
        lp.GlobalArg(
            name="box_source_counts_nonchild", dtype=np.int32,
            shape=None, for_atomic=False),
        lp.GlobalArg(
            name="source_box_starts", dtype=np.int32,
            shape=None, for_atomic=False),
        lp.GlobalArg(
            name="source_box_lists", dtype=np.int32,
            shape=None, for_atomic=False),
        lp.GlobalArg(
            name="strength", dtype=np.float64,
            shape=(1, Variable('nsources')), for_atomic=False),
        lp.GlobalArg(
            name="strength_s0", dtype=np.float64,
            shape=(Variable('nsources'),), for_atomic=False),
        lp.GlobalArg(
            name="result", dtype=np.float64,
            shape=(1, Variable('ntargets')), for_atomic=False),
        lp.GlobalArg(
            name="result_s0", dtype=np.float64,
            shape=(Variable('ntargets'),), for_atomic=False),
        lp.ValueArg(
            name="ntgt_boxes",
            dtype=np.int32),
        lp.GlobalArg(
            name="target_boxes", dtype=np.int32,
            shape=(Variable('ntgt_boxes'),), for_atomic=False),
        lp.TemporaryVariable(
            name="tgt_center",
            dtype=np.float64,
            shape=(2,), for_atomic=False,
            address_space=lp.AddressSpace.PRIVATE,
            read_only=False,
            ),
        lp.TemporaryVariable(
            name="cse_exprvar",
            dtype=np.float64,
            shape=(), for_atomic=False,
            address_space=lp.AddressSpace.PRIVATE,
            read_only=False,
            ),
        lp.TemporaryVariable(
            name="knl_0_scaling",
            dtype=np.float64,
            shape=(), for_atomic=False,
            address_space=lp.AddressSpace.PRIVATE,
            read_only=False,
            ),
        lp.TemporaryVariable(
            name="tgt_ibox",
            dtype=np.int32,
            shape=(), for_atomic=False,
            address_space=lp.AddressSpace.PRIVATE,
            read_only=False,
            ),
        lp.TemporaryVariable(
            name="itgt_start",
            dtype=np.int32,
            shape=(), for_atomic=False,
            address_space=lp.AddressSpace.PRIVATE,
            read_only=False,
            ),
        lp.TemporaryVariable(
            name="itgt_end",
            dtype=np.int32,
            shape=(), for_atomic=False,
            address_space=lp.AddressSpace.PRIVATE,
            read_only=False,
            ),
        lp.TemporaryVariable(
            name="isrc_box_start",
            dtype=np.int32,
            shape=(), for_atomic=False,
            address_space=lp.AddressSpace.PRIVATE,
            read_only=False,
            ),
        lp.TemporaryVariable(
            name="isrc_box_end",
            dtype=np.int32,
            shape=(), for_atomic=False,
            address_space=lp.AddressSpace.PRIVATE,
            read_only=False,
            ),
        lp.TemporaryVariable(
            name="itgt_offset",
            dtype=np.int32,
            shape=(), for_atomic=False,
            address_space=lp.AddressSpace.PRIVATE,
            read_only=False,
            ),
        lp.TemporaryVariable(
            name="itgt",
            dtype=np.int32,
            shape=(), for_atomic=False,
            address_space=lp.AddressSpace.PRIVATE,
            read_only=False,
            ),
        lp.TemporaryVariable(
            name="cond_itgt",
            dtype=np.int32,
            shape=(), for_atomic=False,
            address_space=lp.AddressSpace.PRIVATE,
            read_only=False,
            ),
        lp.TemporaryVariable(
            name="acc",
            dtype=np.float64,
            shape=(1,), for_atomic=False,
            address_space=lp.AddressSpace.PRIVATE,
            read_only=False,
            ),
        lp.TemporaryVariable(
            name="src_ibox",
            dtype=np.int32,
            shape=(), for_atomic=False,
            address_space=lp.AddressSpace.PRIVATE,
            read_only=False,
            ),
        lp.TemporaryVariable(
            name="isrc_start",
            dtype=np.int32,
            shape=(), for_atomic=False,
            address_space=lp.AddressSpace.PRIVATE,
            read_only=False,
            ),
        lp.TemporaryVariable(
            name="isrc_end",
            dtype=np.int32,
            shape=(), for_atomic=False,
            address_space=lp.AddressSpace.PRIVATE,
            read_only=False,
            ),
        lp.TemporaryVariable(
            name="isrc_prefetch_new",
            dtype=np.int32,
            shape=(), for_atomic=False,
            address_space=lp.AddressSpace.PRIVATE,
            read_only=False,
            ),
        lp.TemporaryVariable(
            name="isrc_prefetch",
            dtype=np.int32,
            shape=(), for_atomic=False,
            address_space=lp.AddressSpace.PRIVATE,
            read_only=False,
            ),
        lp.TemporaryVariable(
            name="cond_isrc",
            dtype=np.int32,
            shape=(), for_atomic=False,
            address_space=lp.AddressSpace.PRIVATE,
            read_only=False,
            ),
        lp.TemporaryVariable(
            name="isrc",
            dtype=np.int32,
            shape=(), for_atomic=False,
            address_space=lp.AddressSpace.PRIVATE,
            read_only=False,
            ),
        lp.TemporaryVariable(
            name="d",
            dtype=np.float64,
            shape=(2,), for_atomic=False,
            address_space=lp.AddressSpace.PRIVATE,
            read_only=False,
            ),
        lp.TemporaryVariable(
            name="strength_0",
            dtype=np.float64,
            shape=(), for_atomic=False,
            address_space=lp.AddressSpace.PRIVATE,
            read_only=False,
            ),
        lp.TemporaryVariable(
            name="expr",
            dtype=np.float64,
            shape=(), for_atomic=False,
            address_space=lp.AddressSpace.PRIVATE,
            read_only=False,
            ),
        lp.TemporaryVariable(
            name="pair_result_0",
            dtype=np.float64,
            shape=(), for_atomic=False,
            address_space=lp.AddressSpace.PRIVATE,
            read_only=False,
            ),
        lp.TemporaryVariable(
            name="local_isrc",
            dtype=np.float64,
            shape=(3, 26), for_atomic=False,
            address_space=lp.AddressSpace.LOCAL,
            read_only=False,
            ),
        ],
        lang_version=(2018, 2),
        name="p2p",
        )

p2p_knl = lp.tag_inames(p2p_knl, "istrength:unr")
p2p_knl = lp.tag_inames(p2p_knl, "idim_1:unr")
p2p_knl = lp.tag_inames(p2p_knl, "idim_0:unr")
p2p_knl = lp.tag_inames(p2p_knl, "itgt_box:g.0")
p2p_knl = lp.tag_inames(p2p_knl, "inner:l.0")
p2p_knl = lp.tag_inames(p2p_knl, "idim:unr")
t_unit = lp.merge([p2p_knl])
lp.generate_code_v2(t_unit).device_code()

Metadata

Metadata

Assignees

No one assigned

    Labels

    No labels
    No labels

    Projects

    No projects

    Milestone

    No milestone

    Relationships

    None yet

    Development

    No branches or pull requests

    Issue actions