From c8d75519030b24388b177572655eb2c3fd36e818 Mon Sep 17 00:00:00 2001 From: Dave Stevenson Date: Tue, 13 Jan 2026 18:15:33 +0000 Subject: [PATCH] drm/vc4: Skip input lines when doing a large downscale The TPZ filter wants ideally 3 lines of image to give good quality downscaling. More than that leads to excessive SDRAM bandwidth for no gain. If the downsample factor allows for it, reduce the programmed image height and increase the pitch to compensate. This currently does not handle T-format images where we need to configure it slightly differently. Signed-off-by: Dave Stevenson --- drivers/gpu/drm/vc4/vc4_drv.h | 5 +++ drivers/gpu/drm/vc4/vc4_plane.c | 63 ++++++++++++++++++++++++++------- drivers/gpu/drm/vc4/vc4_regs.h | 4 +++ 3 files changed, 59 insertions(+), 13 deletions(-) diff --git a/drivers/gpu/drm/vc4/vc4_drv.h b/drivers/gpu/drm/vc4/vc4_drv.h index a5f9f3d8ffed55..894bfe9f2ba83c 100644 --- a/drivers/gpu/drm/vc4/vc4_drv.h +++ b/drivers/gpu/drm/vc4/vc4_drv.h @@ -480,6 +480,11 @@ struct vc4_plane_state { */ bool is_yuv444_unity; + /* Skip lines on large downscales to avoid consuming too much SDRAM + * bandwidth + */ + unsigned int vdownsample; + /* Our allocation in LBM for temporary storage during scaling. */ unsigned int lbm_handle; diff --git a/drivers/gpu/drm/vc4/vc4_plane.c b/drivers/gpu/drm/vc4/vc4_plane.c index d79fe20138acd8..929cac4c8555f0 100644 --- a/drivers/gpu/drm/vc4/vc4_plane.c +++ b/drivers/gpu/drm/vc4/vc4_plane.c @@ -935,7 +935,7 @@ static void vc4_write_scaling_parameters(struct drm_plane_state *state, /* Ch0 V-TPZ Words 0-2: Scaling Parameters, Recip, Context */ if (vc4_state->y_scaling[channel] == VC4_SCALING_TPZ) { - vc4_write_tpz(vc4_state, vc4_state->src_h[channel], + vc4_write_tpz(vc4_state, vc4_state->src_h[channel] / vc4_state->vdownsample, vc4_state->crtc_h); vc4_dlist_write(vc4_state, 0xc0c0c0c0); } @@ -1361,6 +1361,17 @@ static int vc4_plane_mode_set(struct drm_plane *plane, return 0; } + if (vc4_state->y_scaling[0] == VC4_SCALING_TPZ && + vc4_state->src_h[0] / vc4_state->crtc_h > (3 << 16)) { + /* Downscaling by more than x3. Reduce the number of lines read + * to avoid exceeding SDRAM bandwidth. + */ + vc4_state->vdownsample = ((vc4_state->src_h[0] / + (vc4_state->crtc_h * 3)) >> 16) + 1; + } else { + vc4_state->vdownsample = 1; + } + width = vc4_state->src_w[0] >> 16; height = vc4_state->src_h[0] >> 16; @@ -1406,6 +1417,9 @@ static int vc4_plane_mode_set(struct drm_plane *plane, break; case DRM_FORMAT_MOD_BROADCOM_VC4_T_TILED: { + /* Line skipping decimation currently not supported for T-format */ + vc4_state->vdownsample = 1; + u32 tile_size_shift = 12; /* T tiles are 4kb */ /* Whole-tile offsets, mostly for setting the pitch. */ u32 tile_w_shift = fb->format->cpp[0] == 2 ? 6 : 5; @@ -1558,6 +1572,9 @@ static int vc4_plane_mode_set(struct drm_plane *plane, offsets[i] += pitch[i] * tile * tile_width; offsets[i] += src_y / (i ? v_subsample : 1) * tile_width; offsets[i] += x_off & ~(i ? 1 : 0); + + pitch[i] |= VC4_SET_FIELD(vc4_state->vdownsample - 1, + SCALER_TILE_SKIP_0); } break; } @@ -1635,7 +1652,8 @@ static int vc4_plane_mode_set(struct drm_plane *plane, (mix_plane_alpha ? SCALER_POS2_ALPHA_MIX : 0) | vc4_hvs4_get_alpha_blend_mode(state) | VC4_SET_FIELD(width, SCALER_POS2_WIDTH) | - VC4_SET_FIELD(height, SCALER_POS2_HEIGHT)); + VC4_SET_FIELD(height / vc4_state->vdownsample, + SCALER_POS2_HEIGHT)); /* Position Word 3: Context. Written by the HVS. */ vc4_dlist_write(vc4_state, 0xc0c0c0c0); @@ -1689,7 +1707,8 @@ static int vc4_plane_mode_set(struct drm_plane *plane, vc4_state->pos2_offset = vc4_state->dlist_count; vc4_dlist_write(vc4_state, VC4_SET_FIELD(width, SCALER5_POS2_WIDTH) | - VC4_SET_FIELD(height, SCALER5_POS2_HEIGHT)); + VC4_SET_FIELD(height / vc4_state->vdownsample, + SCALER5_POS2_HEIGHT)); /* Position Word 3: Context. Written by the HVS. */ vc4_dlist_write(vc4_state, 0xc0c0c0c0); @@ -1713,18 +1732,22 @@ static int vc4_plane_mode_set(struct drm_plane *plane, vc4_dlist_write(vc4_state, 0xc0c0c0c0); /* Pitch word 0 */ - vc4_dlist_write(vc4_state, pitch[0]); + if (hvs_format != HVS_PIXEL_FORMAT_H264 && + hvs_format != HVS_PIXEL_FORMAT_YCBCR_10BIT) + vc4_dlist_write(vc4_state, pitch[0] * vc4_state->vdownsample); + else + vc4_dlist_write(vc4_state, pitch[0]); /* Pitch word 1/2 */ for (i = 1; i < num_planes; i++) { if (hvs_format != HVS_PIXEL_FORMAT_H264 && - hvs_format != HVS_PIXEL_FORMAT_YCBCR_10BIT) { + hvs_format != HVS_PIXEL_FORMAT_YCBCR_10BIT) vc4_dlist_write(vc4_state, - VC4_SET_FIELD(fb->pitches[i], + VC4_SET_FIELD(fb->pitches[i] / + vc4_state->vdownsample, SCALER_SRC_PITCH)); - } else { - vc4_dlist_write(vc4_state, pitch[1]); - } + else + vc4_dlist_write(vc4_state, pitch[i]); } /* Colorspace conversion words */ @@ -1938,6 +1961,17 @@ static int vc6_plane_mode_set(struct drm_plane *plane, width = vc4_state->src_w[0] >> 16; height = vc4_state->src_h[0] >> 16; + if (vc4_state->y_scaling[0] == VC4_SCALING_TPZ && + vc4_state->src_h[0] / vc4_state->crtc_h > (3 << 16)) { + /* Downscaling by more than x3. Reduce the number of lines read + * to avoid exceeding SDRAM bandwidth. + */ + vc4_state->vdownsample = ((vc4_state->src_h[0] / + (vc4_state->crtc_h * 3)) >> 16) + 1; + } else { + vc4_state->vdownsample = 1; + } + /* SCL1 is used for Cb/Cr scaling of planar formats. For RGB * and 4:4:4, scl1 should be set to scl0 so both channels of * the scaler do the same thing. For YUV, the Y plane needs @@ -2090,8 +2124,11 @@ static int vc6_plane_mode_set(struct drm_plane *plane, * Finished using the pitch as a pitch, so pack it as the * register value. */ - pitch[i] = VC4_SET_FIELD(pitch[i], SCALER6_PTR2_PITCH) | - VC4_SET_FIELD(fetch_count - 1, SCALER6_PTR2_FETCH_COUNT); + pitch[i] = VC4_SET_FIELD(pitch[i], SCALER6_PTR2_TILE_HEIGHT) | + VC4_SET_FIELD(fetch_count - 1, + SCALER6_PTR2_TILE_FETCH_COUNT) | + VC4_SET_FIELD(vc4_state->vdownsample - 1, + SCALER6_PTR2_TILE_LSKIP); } break; @@ -2152,7 +2189,7 @@ static int vc6_plane_mode_set(struct drm_plane *plane, /* Position Word 2: Source Image Size */ vc4_state->pos2_offset = vc4_state->dlist_count; vc4_dlist_write(vc4_state, - VC4_SET_FIELD(height - 1, + VC4_SET_FIELD((height / vc4_state->vdownsample) - 1, SCALER6_POS2_SRC_LINES) | VC4_SET_FIELD(width - 1, SCALER6_POS2_SRC_WIDTH)); @@ -2187,7 +2224,7 @@ static int vc6_plane_mode_set(struct drm_plane *plane, if (base_format_mod != DRM_FORMAT_MOD_BROADCOM_SAND128 && base_format_mod != DRM_FORMAT_MOD_BROADCOM_SAND256) { vc4_dlist_write(vc4_state, - VC4_SET_FIELD(fb->pitches[i], + VC4_SET_FIELD((fb->pitches[i] * vc4_state->vdownsample), SCALER6_PTR2_PITCH)); } else { vc4_dlist_write(vc4_state, pitch[i]); diff --git a/drivers/gpu/drm/vc4/vc4_regs.h b/drivers/gpu/drm/vc4/vc4_regs.h index 68d83d27c32ad9..e50544df33aedd 100644 --- a/drivers/gpu/drm/vc4/vc4_regs.h +++ b/drivers/gpu/drm/vc4/vc4_regs.h @@ -1415,4 +1415,8 @@ enum hvs_pixel_format { #define SCALER6_PTR2_PITCH_MASK VC4_MASK(16, 0) #define SCALER6_PTR2_FETCH_COUNT_MASK VC4_MASK(26, 16) +#define SCALER6_PTR2_TILE_LSKIP_MASK VC4_MASK(31, 29) +#define SCALER6_PTR2_TILE_FETCH_COUNT_MASK VC4_MASK(26, 16) +#define SCALER6_PTR2_TILE_HEIGHT_MASK VC4_MASK(16, 0) + #endif /* VC4_REGS_H */