From 7b92c1bd0540b64f54d98331d67e57266f9343c4 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Wed, 28 Jun 2017 13:35:48 +0100 Subject: drm/i915: Avoid keeping waitboost active for signaling threads MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Once a client has requested a waitboost, we keep that waitboost active until all clients are no longer waiting. This is because we don't distinguish which waiter deserves the boost. However, with the advent of fence signaling, the signaler threads appear as waiters to the RPS interrupt handler. So instead of using a single boolean to track when to keep the waitboost active, use a counter of all outstanding waitboosted requests. At this point, I have removed all vestiges of the rate limiting on clients. Whilst this means that compositors should remain more fluid, it also means that boosts are more prevalent. See commit b29c19b64528 ("drm/i915: Boost RPS frequency for CPU stalls") for a longer discussion on the pros and cons of both approaches. A drawback of this implementation is that it requires constant request submission to keep the waitboost trimmed (as it is now cancelled when the request is completed). This will be fine for a busy system, but near idle the boosts may be kept for longer than desired (effectively tens of vblanks worstcase) and there is a reliance on rc6 instead. v2: Remove defunct rps.client_lock Reported-by: Michał Winiarski Signed-off-by: Chris Wilson Cc: Michał Winiarski Reviewed-by: Michał Winiarski Link: http://patchwork.freedesktop.org/patch/msgid/20170628123548.9236-1-chris@chris-wilson.co.uk --- drivers/gpu/drm/i915/intel_pm.c | 57 ++++++++++++++++------------------------- 1 file changed, 22 insertions(+), 35 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_pm.c') diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 48ea0fca1f72..c3fcadfa0ae7 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -6126,47 +6126,35 @@ void gen6_rps_idle(struct drm_i915_private *dev_priv) gen6_sanitize_rps_pm_mask(dev_priv, ~0)); } mutex_unlock(&dev_priv->rps.hw_lock); - - spin_lock(&dev_priv->rps.client_lock); - while (!list_empty(&dev_priv->rps.clients)) - list_del_init(dev_priv->rps.clients.next); - spin_unlock(&dev_priv->rps.client_lock); } -void gen6_rps_boost(struct drm_i915_private *dev_priv, - struct intel_rps_client *rps, - unsigned long submitted) +void gen6_rps_boost(struct drm_i915_gem_request *rq, + struct intel_rps_client *rps) { + struct drm_i915_private *i915 = rq->i915; + bool boost; + /* This is intentionally racy! We peek at the state here, then * validate inside the RPS worker. */ - if (!(dev_priv->gt.awake && - dev_priv->rps.enabled && - dev_priv->rps.cur_freq < dev_priv->rps.boost_freq)) + if (!i915->rps.enabled) return; - /* Force a RPS boost (and don't count it against the client) if - * the GPU is severely congested. - */ - if (rps && time_after(jiffies, submitted + DRM_I915_THROTTLE_JIFFIES)) - rps = NULL; - - spin_lock(&dev_priv->rps.client_lock); - if (rps == NULL || list_empty(&rps->link)) { - spin_lock_irq(&dev_priv->irq_lock); - if (dev_priv->rps.interrupts_enabled) { - dev_priv->rps.client_boost = true; - schedule_work(&dev_priv->rps.work); - } - spin_unlock_irq(&dev_priv->irq_lock); - - if (rps != NULL) { - list_add(&rps->link, &dev_priv->rps.clients); - rps->boosts++; - } else - dev_priv->rps.boosts++; + boost = false; + spin_lock_irq(&rq->lock); + if (!rq->waitboost && !i915_gem_request_completed(rq)) { + atomic_inc(&i915->rps.num_waiters); + rq->waitboost = true; + boost = true; } - spin_unlock(&dev_priv->rps.client_lock); + spin_unlock_irq(&rq->lock); + if (!boost) + return; + + if (READ_ONCE(i915->rps.cur_freq) < i915->rps.boost_freq) + schedule_work(&i915->rps.work); + + atomic_inc(rps ? &rps->boosts : &i915->rps.boosts); } int intel_set_rps(struct drm_i915_private *dev_priv, u8 val) @@ -9113,7 +9101,7 @@ static void __intel_rps_boost_work(struct work_struct *work) struct drm_i915_gem_request *req = boost->req; if (!i915_gem_request_completed(req)) - gen6_rps_boost(req->i915, NULL, req->emitted_jiffies); + gen6_rps_boost(req, NULL); i915_gem_request_put(req); kfree(boost); @@ -9142,11 +9130,10 @@ void intel_queue_rps_boost_for_request(struct drm_i915_gem_request *req) void intel_pm_setup(struct drm_i915_private *dev_priv) { mutex_init(&dev_priv->rps.hw_lock); - spin_lock_init(&dev_priv->rps.client_lock); INIT_DELAYED_WORK(&dev_priv->rps.autoenable_work, __intel_autoenable_gt_powersave); - INIT_LIST_HEAD(&dev_priv->rps.clients); + atomic_set(&dev_priv->rps.num_waiters, 0); dev_priv->pm.suspended = false; atomic_set(&dev_priv->pm.wakeref_count, 0); -- cgit v1.2.1 From 35ceabf3cdb557b23bbc09f0b6f7bb2b545185b1 Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Thu, 6 Jul 2017 13:41:13 -0700 Subject: drm/i915/cnl: Inherit RPS stuff from previous platforms. Apparently no change on RPS stuff from previous platforms. v2: Merging to rps related patches in one and also adding missed cases. Cc: David Weinehall Signed-off-by: Rodrigo Vivi Reviewed-by: David Weinehall Link: http://patchwork.freedesktop.org/patch/msgid/1499373673-25066-1-git-send-email-rodrigo.vivi@intel.com --- drivers/gpu/drm/i915/intel_pm.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_pm.c') diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index c3fcadfa0ae7..6db833e6dcbd 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -5852,7 +5852,7 @@ static u32 intel_rps_limits(struct drm_i915_private *dev_priv, u8 val) * the hw runs at the minimal clock before selecting the desired * frequency, if the down threshold expires in that window we will not * receive a down interrupt. */ - if (IS_GEN9(dev_priv)) { + if (INTEL_GEN(dev_priv) >= 9) { limits = (dev_priv->rps.max_freq_softlimit) << 23; if (val <= dev_priv->rps.min_freq_softlimit) limits |= (dev_priv->rps.min_freq_softlimit) << 14; @@ -5994,7 +5994,7 @@ static int gen6_set_rps(struct drm_i915_private *dev_priv, u8 val) if (val != dev_priv->rps.cur_freq) { gen6_set_rps_thresholds(dev_priv, val); - if (IS_GEN9(dev_priv)) + if (INTEL_GEN(dev_priv) >= 9) I915_WRITE(GEN6_RPNSWREQ, GEN9_FREQUENCY(val)); else if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv)) @@ -6353,7 +6353,7 @@ static void gen6_init_rps_frequencies(struct drm_i915_private *dev_priv) dev_priv->rps.efficient_freq = dev_priv->rps.rp1_freq; if (IS_HASWELL(dev_priv) || IS_BROADWELL(dev_priv) || - IS_GEN9_BC(dev_priv)) { + IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) { u32 ddcc_status = 0; if (sandybridge_pcode_read(dev_priv, @@ -6366,7 +6366,7 @@ static void gen6_init_rps_frequencies(struct drm_i915_private *dev_priv) dev_priv->rps.max_freq); } - if (IS_GEN9_BC(dev_priv)) { + if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) { /* Store the frequency values in 16.66 MHZ units, which is * the natural hardware unit for SKL */ @@ -6672,7 +6672,7 @@ static void gen6_update_ring_freq(struct drm_i915_private *dev_priv) /* convert DDR frequency from units of 266.6MHz to bandwidth */ min_ring_freq = mult_frac(min_ring_freq, 8, 3); - if (IS_GEN9_BC(dev_priv)) { + if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) { /* Convert GT frequency to 50 HZ units */ min_gpu_freq = dev_priv->rps.min_freq / GEN9_FREQ_SCALER; max_gpu_freq = dev_priv->rps.max_freq / GEN9_FREQ_SCALER; @@ -6690,7 +6690,7 @@ static void gen6_update_ring_freq(struct drm_i915_private *dev_priv) int diff = max_gpu_freq - gpu_freq; unsigned int ia_freq = 0, ring_freq = 0; - if (IS_GEN9_BC(dev_priv)) { + if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) { /* * ring_freq = 2 * GT. ring_freq is in 100MHz units * No floor required for ring frequency on SKL. @@ -7821,7 +7821,7 @@ void intel_enable_gt_powersave(struct drm_i915_private *dev_priv) } else if (INTEL_GEN(dev_priv) >= 9) { gen9_enable_rc6(dev_priv); gen9_enable_rps(dev_priv); - if (IS_GEN9_BC(dev_priv)) + if (IS_GEN9_BC(dev_priv) || IS_CANNONLAKE(dev_priv)) gen6_update_ring_freq(dev_priv); } else if (IS_BROADWELL(dev_priv)) { gen8_enable_rps(dev_priv); @@ -9066,7 +9066,7 @@ static int chv_freq_opcode(struct drm_i915_private *dev_priv, int val) int intel_gpu_freq(struct drm_i915_private *dev_priv, int val) { - if (IS_GEN9(dev_priv)) + if (INTEL_GEN(dev_priv) >= 9) return DIV_ROUND_CLOSEST(val * GT_FREQUENCY_MULTIPLIER, GEN9_FREQ_SCALER); else if (IS_CHERRYVIEW(dev_priv)) @@ -9079,7 +9079,7 @@ int intel_gpu_freq(struct drm_i915_private *dev_priv, int val) int intel_freq_opcode(struct drm_i915_private *dev_priv, int val) { - if (IS_GEN9(dev_priv)) + if (INTEL_GEN(dev_priv) >= 9) return DIV_ROUND_CLOSEST(val * GEN9_FREQ_SCALER, GT_FREQUENCY_MULTIPLIER); else if (IS_CHERRYVIEW(dev_priv)) -- cgit v1.2.1 From eed02a7b53131abb796ba8a8cf2886cee366a89f Mon Sep 17 00:00:00 2001 From: "Kumar, Mahesh" Date: Wed, 5 Jul 2017 20:01:45 +0530 Subject: drm/i915: Always perform internal fixed16 division in 64 bits This patch combines fixed_16_16_div & fixed_16_16_div_u64 wrappers. And new fixed_16_16_div wrapper always performs division operation in u64 internally, to avoid any data loss which was happening in earlier version of wrapper. earlier wrapper was converting u32 to fixed16 in 32 bit so we were losing 16-MSB data. Signed-off-by: Mahesh Kumar Reviewed-by: Maarten Lankhorst Signed-off-by: Maarten Lankhorst Link: http://patchwork.freedesktop.org/patch/msgid/20170705143154.32132-3-mahesh1.kumar@intel.com [mlankhorst: Fix typo in commit message.] --- drivers/gpu/drm/i915/intel_pm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_pm.c') diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 6db833e6dcbd..05eabadaa23d 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4276,7 +4276,7 @@ static uint_fixed_16_16_t skl_wm_method1(uint32_t pixel_rate, uint8_t cpp, return FP_16_16_MAX; wm_intermediate_val = latency * pixel_rate * cpp; - ret = fixed_16_16_div_u64(wm_intermediate_val, 1000 * 512); + ret = fixed_16_16_div(wm_intermediate_val, 1000 * 512); return ret; } @@ -4314,7 +4314,7 @@ intel_get_linetime_us(struct intel_crtc_state *cstate) return u32_to_fixed_16_16(0); crtc_htotal = cstate->base.adjusted_mode.crtc_htotal; - linetime_us = fixed_16_16_div_u64(crtc_htotal * 1000, pixel_rate); + linetime_us = fixed_16_16_div(crtc_htotal * 1000, pixel_rate); return linetime_us; } -- cgit v1.2.1 From eac2cb81fb87223198c2be93bfd49357d71be669 Mon Sep 17 00:00:00 2001 From: "Kumar, Mahesh" Date: Wed, 5 Jul 2017 20:01:46 +0530 Subject: drm/i915: cleanup fixed-point wrappers naming This patch make naming of fixed-point wrappers consistent operation__<1st operand>_<2nd operand> also shorten the name for fixed_16_16 to fixed16 s/u32_to_fixed_16_16/u32_to_fixed16 s/fixed_16_16_to_u32/fixed16_to_u32 s/fixed_16_16_to_u32_round_up/fixed16_to_u32_round_up s/min_fixed_16_16/min_fixed16 s/max_fixed_16_16/max_fixed16 s/mul_u32_fixed_16_16/mul_u32_fixed16 s/fixed_16_16_div/div_fixed16 Changes Since V1: - Split the patch in more logical patches (Maarten) Changes Since V2: - Rebase Signed-off-by: Mahesh Kumar Reviewed-by: Maarten Lankhorst Signed-off-by: Maarten Lankhorst Link: http://patchwork.freedesktop.org/patch/msgid/20170705143154.32132-4-mahesh1.kumar@intel.com --- drivers/gpu/drm/i915/intel_pm.c | 63 ++++++++++++++++++++--------------------- 1 file changed, 31 insertions(+), 32 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_pm.c') diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 05eabadaa23d..2603df15b4e1 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3837,7 +3837,7 @@ skl_plane_downscale_amount(const struct intel_crtc_state *cstate, uint_fixed_16_16_t downscale_h, downscale_w; if (WARN_ON(!intel_wm_plane_visible(cstate, pstate))) - return u32_to_fixed_16_16(0); + return u32_to_fixed16(0); /* n.b., src is 16.16 fixed point, dst is whole integer */ if (plane->id == PLANE_CURSOR) { @@ -3861,10 +3861,10 @@ skl_plane_downscale_amount(const struct intel_crtc_state *cstate, dst_h = drm_rect_height(&pstate->base.dst); } - fp_w_ratio = fixed_16_16_div(src_w, dst_w); - fp_h_ratio = fixed_16_16_div(src_h, dst_h); - downscale_w = max_fixed_16_16(fp_w_ratio, u32_to_fixed_16_16(1)); - downscale_h = max_fixed_16_16(fp_h_ratio, u32_to_fixed_16_16(1)); + fp_w_ratio = div_fixed16(src_w, dst_w); + fp_h_ratio = div_fixed16(src_h, dst_h); + downscale_w = max_fixed16(fp_w_ratio, u32_to_fixed16(1)); + downscale_h = max_fixed16(fp_h_ratio, u32_to_fixed16(1)); return mul_fixed16(downscale_w, downscale_h); } @@ -3872,7 +3872,7 @@ skl_plane_downscale_amount(const struct intel_crtc_state *cstate, static uint_fixed_16_16_t skl_pipe_downscale_amount(const struct intel_crtc_state *crtc_state) { - uint_fixed_16_16_t pipe_downscale = u32_to_fixed_16_16(1); + uint_fixed_16_16_t pipe_downscale = u32_to_fixed16(1); if (!crtc_state->base.enable) return pipe_downscale; @@ -3891,10 +3891,10 @@ skl_pipe_downscale_amount(const struct intel_crtc_state *crtc_state) if (!dst_w || !dst_h) return pipe_downscale; - fp_w_ratio = fixed_16_16_div(src_w, dst_w); - fp_h_ratio = fixed_16_16_div(src_h, dst_h); - downscale_w = max_fixed_16_16(fp_w_ratio, u32_to_fixed_16_16(1)); - downscale_h = max_fixed_16_16(fp_h_ratio, u32_to_fixed_16_16(1)); + fp_w_ratio = div_fixed16(src_w, dst_w); + fp_h_ratio = div_fixed16(src_h, dst_h); + downscale_w = max_fixed16(fp_w_ratio, u32_to_fixed16(1)); + downscale_h = max_fixed16(fp_h_ratio, u32_to_fixed16(1)); pipe_downscale = mul_fixed16(downscale_w, downscale_h); } @@ -3913,14 +3913,14 @@ int skl_check_pipe_max_pixel_rate(struct intel_crtc *intel_crtc, int crtc_clock, dotclk; uint32_t pipe_max_pixel_rate; uint_fixed_16_16_t pipe_downscale; - uint_fixed_16_16_t max_downscale = u32_to_fixed_16_16(1); + uint_fixed_16_16_t max_downscale = u32_to_fixed16(1); if (!cstate->base.enable) return 0; drm_atomic_crtc_state_for_each_plane_state(plane, pstate, crtc_state) { uint_fixed_16_16_t plane_downscale; - uint_fixed_16_16_t fp_9_div_8 = fixed_16_16_div(9, 8); + uint_fixed_16_16_t fp_9_div_8 = div_fixed16(9, 8); int bpp; if (!intel_wm_plane_visible(cstate, @@ -3938,7 +3938,7 @@ int skl_check_pipe_max_pixel_rate(struct intel_crtc *intel_crtc, plane_downscale = mul_fixed16(plane_downscale, fp_9_div_8); - max_downscale = max_fixed_16_16(plane_downscale, max_downscale); + max_downscale = max_fixed16(plane_downscale, max_downscale); } pipe_downscale = skl_pipe_downscale_amount(cstate); @@ -4276,7 +4276,7 @@ static uint_fixed_16_16_t skl_wm_method1(uint32_t pixel_rate, uint8_t cpp, return FP_16_16_MAX; wm_intermediate_val = latency * pixel_rate * cpp; - ret = fixed_16_16_div(wm_intermediate_val, 1000 * 512); + ret = div_fixed16(wm_intermediate_val, 1000 * 512); return ret; } @@ -4294,7 +4294,7 @@ static uint_fixed_16_16_t skl_wm_method2(uint32_t pixel_rate, wm_intermediate_val = latency * pixel_rate; wm_intermediate_val = DIV_ROUND_UP(wm_intermediate_val, pipe_htotal * 1000); - ret = mul_u32_fixed_16_16(wm_intermediate_val, plane_blocks_per_line); + ret = mul_u32_fixed16(wm_intermediate_val, plane_blocks_per_line); return ret; } @@ -4306,15 +4306,15 @@ intel_get_linetime_us(struct intel_crtc_state *cstate) uint_fixed_16_16_t linetime_us; if (!cstate->base.active) - return u32_to_fixed_16_16(0); + return u32_to_fixed16(0); pixel_rate = cstate->pixel_rate; if (WARN_ON(pixel_rate == 0)) - return u32_to_fixed_16_16(0); + return u32_to_fixed16(0); crtc_htotal = cstate->base.adjusted_mode.crtc_htotal; - linetime_us = fixed_16_16_div(crtc_htotal * 1000, pixel_rate); + linetime_us = div_fixed16(crtc_htotal * 1000, pixel_rate); return linetime_us; } @@ -4434,14 +4434,14 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, if (y_tiled) { interm_pbpl = DIV_ROUND_UP(plane_bytes_per_line * y_min_scanlines, 512); - plane_blocks_per_line = fixed_16_16_div(interm_pbpl, + plane_blocks_per_line = div_fixed16(interm_pbpl, y_min_scanlines); } else if (x_tiled) { interm_pbpl = DIV_ROUND_UP(plane_bytes_per_line, 512); - plane_blocks_per_line = u32_to_fixed_16_16(interm_pbpl); + plane_blocks_per_line = u32_to_fixed16(interm_pbpl); } else { interm_pbpl = DIV_ROUND_UP(plane_bytes_per_line, 512) + 1; - plane_blocks_per_line = u32_to_fixed_16_16(interm_pbpl); + plane_blocks_per_line = u32_to_fixed16(interm_pbpl); } method1 = skl_wm_method1(plane_pixel_rate, cpp, latency); @@ -4450,35 +4450,35 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, latency, plane_blocks_per_line); - y_tile_minimum = mul_u32_fixed_16_16(y_min_scanlines, - plane_blocks_per_line); + y_tile_minimum = mul_u32_fixed16(y_min_scanlines, + plane_blocks_per_line); if (y_tiled) { - selected_result = max_fixed_16_16(method2, y_tile_minimum); + selected_result = max_fixed16(method2, y_tile_minimum); } else { uint32_t linetime_us; - linetime_us = fixed_16_16_to_u32_round_up( + linetime_us = fixed16_to_u32_round_up( intel_get_linetime_us(cstate)); if ((cpp * cstate->base.adjusted_mode.crtc_htotal / 512 < 1) && (plane_bytes_per_line / 512 < 1)) selected_result = method2; else if ((ddb_allocation && ddb_allocation / - fixed_16_16_to_u32_round_up(plane_blocks_per_line)) >= 1) - selected_result = min_fixed_16_16(method1, method2); + fixed16_to_u32_round_up(plane_blocks_per_line)) >= 1) + selected_result = min_fixed16(method1, method2); else if (latency >= linetime_us) - selected_result = min_fixed_16_16(method1, method2); + selected_result = min_fixed16(method1, method2); else selected_result = method1; } - res_blocks = fixed_16_16_to_u32_round_up(selected_result) + 1; + res_blocks = fixed16_to_u32_round_up(selected_result) + 1; res_lines = div_round_up_fixed16(selected_result, plane_blocks_per_line); if (level >= 1 && level <= 7) { if (y_tiled) { - res_blocks += fixed_16_16_to_u32_round_up(y_tile_minimum); + res_blocks += fixed16_to_u32_round_up(y_tile_minimum); res_lines += y_min_scanlines; } else { res_blocks++; @@ -4563,8 +4563,7 @@ skl_compute_linetime_wm(struct intel_crtc_state *cstate) if (is_fixed16_zero(linetime_us)) return 0; - linetime_wm = fixed_16_16_to_u32_round_up(mul_u32_fixed_16_16(8, - linetime_us)); + linetime_wm = fixed16_to_u32_round_up(mul_u32_fixed16(8, linetime_us)); /* Display WA #1135: bxt. */ if (IS_BROXTON(dev_priv) && dev_priv->ipc_enabled) -- cgit v1.2.1 From 129eaa957dd5a717edd70cfaf0626c143c03e54e Mon Sep 17 00:00:00 2001 From: "Kumar, Mahesh" Date: Wed, 5 Jul 2017 20:01:48 +0530 Subject: drm/i915/skl+: WM calculation don't require height height of plane was require to swap width/height in case of 90/270 rotation. Now src structure contains already swapped values, So we don't have to calculate height of the plane. Signed-off-by: Mahesh Kumar Reviewed-by: Maarten Lankhorst Signed-off-by: Maarten Lankhorst Link: http://patchwork.freedesktop.org/patch/msgid/20170705143154.32132-6-mahesh1.kumar@intel.com --- drivers/gpu/drm/i915/intel_pm.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_pm.c') diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 2603df15b4e1..81e77f073d8c 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4361,7 +4361,7 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, uint32_t plane_bytes_per_line; uint32_t res_blocks, res_lines; uint8_t cpp; - uint32_t width = 0, height = 0; + uint32_t width = 0; uint32_t plane_pixel_rate; uint_fixed_16_16_t y_tile_minimum; uint32_t y_min_scanlines; @@ -4390,7 +4390,6 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, if (plane->id == PLANE_CURSOR) { width = intel_pstate->base.crtc_w; - height = intel_pstate->base.crtc_h; } else { /* * Src coordinates are already rotated by 270 degrees for @@ -4398,7 +4397,6 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, * GTT mapping), hence no need to account for rotation here. */ width = drm_rect_width(&intel_pstate->base.src) >> 16; - height = drm_rect_height(&intel_pstate->base.src) >> 16; } cpp = fb->format->cpp[0]; -- cgit v1.2.1 From b064be0784530d2a98b589b40793e3d421fb93ba Mon Sep 17 00:00:00 2001 From: "Kumar, Mahesh" Date: Wed, 5 Jul 2017 20:01:49 +0530 Subject: drm/i915/skl+: unify cpp value in WM calculation use same cpp value in different phase of plane WM caluclation. Signed-off-by: Mahesh Kumar Reviewed-by: Maarten Lankhorst Signed-off-by: Maarten Lankhorst Link: http://patchwork.freedesktop.org/patch/msgid/20170705143154.32132-7-mahesh1.kumar@intel.com --- drivers/gpu/drm/i915/intel_pm.c | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_pm.c') diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 81e77f073d8c..ee2a349cfe68 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4399,13 +4399,11 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, width = drm_rect_width(&intel_pstate->base.src) >> 16; } - cpp = fb->format->cpp[0]; + cpp = (fb->format->format == DRM_FORMAT_NV12) ? fb->format->cpp[1] : + fb->format->cpp[0]; plane_pixel_rate = skl_adjusted_plane_pixel_rate(cstate, intel_pstate); if (drm_rotation_90_or_270(pstate->rotation)) { - int cpp = (fb->format->format == DRM_FORMAT_NV12) ? - fb->format->cpp[1] : - fb->format->cpp[0]; switch (cpp) { case 1: -- cgit v1.2.1 From 54d20ed1fff23c7d2633f01fc788111bf9c51c5d Mon Sep 17 00:00:00 2001 From: Maarten Lankhorst Date: Mon, 17 Jul 2017 14:02:30 +0200 Subject: drm/i915: Fix bad comparison in skl_compute_plane_wm, v2. ddb_allocation && ddb_allocation / blocks_per_line >= 1 is the same as ddb_allocation >= blocks_per_line, so use the latter to simplify this. This fixes the following compiler warning: drivers/gpu/drm/i915/intel_pm.c:4467]: (warning) Comparison of a boolean expression with an integer other than 0 or 1. Changes since v1: - Rebase, was missing the changes to the macro names. Signed-off-by: Maarten Lankhorst Fixes: d555cb5827d6 ("drm/i915/skl+: use linetime latency if ddb size is not available") Cc: "Mahesh Kumar" Reported-by: David Binderman Cc: David Binderman Cc: # v4.13-rc1+ Link: http://patchwork.freedesktop.org/patch/msgid/20170717120230.2023-1-maarten.lankhorst@linux.intel.com Reviewed-by: Mahesh Kumar --- drivers/gpu/drm/i915/intel_pm.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_pm.c') diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index ee2a349cfe68..48785ef75d33 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4459,8 +4459,8 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, if ((cpp * cstate->base.adjusted_mode.crtc_htotal / 512 < 1) && (plane_bytes_per_line / 512 < 1)) selected_result = method2; - else if ((ddb_allocation && ddb_allocation / - fixed16_to_u32_round_up(plane_blocks_per_line)) >= 1) + else if (ddb_allocation >= + fixed16_to_u32_round_up(plane_blocks_per_line)) selected_result = min_fixed16(method1, method2); else if (latency >= linetime_us) selected_result = min_fixed16(method1, method2); -- cgit v1.2.1 From 5a9cfff46d193388749f2c4e6ec75f40b47942d2 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Fri, 28 Jul 2017 09:50:22 +0100 Subject: drm/i915: Include mbox details for pcode read/write failures If we fail at punit communication, include both the mbox address and the value we tried to write so that we can identify the invalid sequence. Signed-off-by: Chris Wilson Cc: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/20170728085022.1586-1-chris@chris-wilson.co.uk Reviewed-by: Daniel Vetter --- drivers/gpu/drm/i915/intel_pm.c | 21 +++++++++++++-------- 1 file changed, 13 insertions(+), 8 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_pm.c') diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 48785ef75d33..8711c1f04079 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -8831,6 +8831,7 @@ static inline int gen6_check_mailbox_status(struct drm_i915_private *dev_priv) case GEN6_PCODE_SUCCESS: return 0; case GEN6_PCODE_UNIMPLEMENTED_CMD: + return -ENODEV; case GEN6_PCODE_ILLEGAL_CMD: return -ENXIO; case GEN6_PCODE_MIN_FREQ_TABLE_GT_RATIO_OUT_OF_RANGE: @@ -8878,7 +8879,8 @@ int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val */ if (I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) { - DRM_DEBUG_DRIVER("warning: pcode (read) mailbox access failed\n"); + DRM_DEBUG_DRIVER("warning: pcode (read from mbox %x) mailbox access failed for %ps\n", + mbox, __builtin_return_address(0)); return -EAGAIN; } @@ -8889,7 +8891,8 @@ int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val if (__intel_wait_for_register_fw(dev_priv, GEN6_PCODE_MAILBOX, GEN6_PCODE_READY, 0, 500, 0, NULL)) { - DRM_ERROR("timeout waiting for pcode read (%d) to finish\n", mbox); + DRM_ERROR("timeout waiting for pcode read (from mbox %x) to finish for %ps\n", + mbox, __builtin_return_address(0)); return -ETIMEDOUT; } @@ -8902,8 +8905,8 @@ int sandybridge_pcode_read(struct drm_i915_private *dev_priv, u32 mbox, u32 *val status = gen6_check_mailbox_status(dev_priv); if (status) { - DRM_DEBUG_DRIVER("warning: pcode (read) mailbox access failed: %d\n", - status); + DRM_DEBUG_DRIVER("warning: pcode (read from mbox %x) mailbox access failed for %ps: %d\n", + mbox, __builtin_return_address(0), status); return status; } @@ -8923,7 +8926,8 @@ int sandybridge_pcode_write(struct drm_i915_private *dev_priv, */ if (I915_READ_FW(GEN6_PCODE_MAILBOX) & GEN6_PCODE_READY) { - DRM_DEBUG_DRIVER("warning: pcode (write) mailbox access failed\n"); + DRM_DEBUG_DRIVER("warning: pcode (write of 0x%08x to mbox %x) mailbox access failed for %ps\n", + val, mbox, __builtin_return_address(0)); return -EAGAIN; } @@ -8934,7 +8938,8 @@ int sandybridge_pcode_write(struct drm_i915_private *dev_priv, if (__intel_wait_for_register_fw(dev_priv, GEN6_PCODE_MAILBOX, GEN6_PCODE_READY, 0, 500, 0, NULL)) { - DRM_ERROR("timeout waiting for pcode write (%d) to finish\n", mbox); + DRM_ERROR("timeout waiting for pcode write of 0x%08x to mbox %x to finish for %ps\n", + val, mbox, __builtin_return_address(0)); return -ETIMEDOUT; } @@ -8946,8 +8951,8 @@ int sandybridge_pcode_write(struct drm_i915_private *dev_priv, status = gen6_check_mailbox_status(dev_priv); if (status) { - DRM_DEBUG_DRIVER("warning: pcode (write) mailbox access failed: %d\n", - status); + DRM_DEBUG_DRIVER("warning: pcode (write of 0x%08x to mbox %x) mailbox access failed for %ps: %d\n", + val, mbox, __builtin_return_address(0), status); return status; } -- cgit v1.2.1 From 32087d1425887e2d51e8c77ff9849d73f6384457 Mon Sep 17 00:00:00 2001 From: Praveen Paneri Date: Thu, 3 Aug 2017 23:02:10 +0530 Subject: drm/i915: enable WaDisableDopClkGating for skl This WA is required when decoupled frequencies for slice and unslice are enabled. This disables DOP clock gating for skl. v2: enable the WA for all gen9 platforms (not just for SKL GT4 where the hang issue is originally reported) to avoid rare hangs (David) v3: as per WaDatabase, enable it only for SKL (Rodrigo) Cc: David Weinehall Reviewed-by: David Weinehall Signed-off-by: Praveen Paneri Signed-off-by: Rodrigo Vivi Link: https://patchwork.freedesktop.org/patch/msgid/1501781530-8186-1-git-send-email-praveen.paneri@intel.com --- drivers/gpu/drm/i915/intel_pm.c | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'drivers/gpu/drm/i915/intel_pm.c') diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 8711c1f04079..6e393b217450 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -78,6 +78,12 @@ static void gen9_init_clock_gating(struct drm_i915_private *dev_priv) /* WaFbcHighMemBwCorruptionAvoidance:skl,bxt,kbl,cfl */ I915_WRITE(ILK_DPFC_CHICKEN, I915_READ(ILK_DPFC_CHICKEN) | ILK_DPFC_DISABLE_DUMMY0); + + if (IS_SKYLAKE(dev_priv)) { + /* WaDisableDopClockGating */ + I915_WRITE(GEN7_MISCCPCTL, I915_READ(GEN7_MISCCPCTL) + & ~GEN7_DOP_CLOCK_GATE_ENABLE); + } } static void bxt_init_clock_gating(struct drm_i915_private *dev_priv) -- cgit v1.2.1 From 2e2adb05736c3101a0b301e39bf5adabb8b5fb22 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ville=20Syrj=C3=A4l=C3=A4?= Date: Tue, 1 Aug 2017 09:58:13 -0700 Subject: drm/i915: Add render decompression support MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit SKL+ display engine can scan out certain kinds of compressed surfaces produced by the render engine. This involved telling the display engine the location of the color control surfae (CCS) which describes which parts of the main surface are compressed and which are not. The location of CCS is provided by userspace as just another plane with its own offset. Add the required stuff to validate the user provided AUX plane metadata and convert the user provided linear offset into something the hardware can consume. Due to hardware limitations we require that the main surface and the AUX surface (CCS) be part of the same bo. The hardware also makes life hard by not allowing you to provide separate x/y offsets for the main and AUX surfaces (excpet with NV12), so finding suitable offsets for both requires a bit of work. Assuming we still want keep playing tricks with the offsets. I've just gone with a dumb "search backward for suitable offsets" approach, which is far from optimal, but it works. Also not all planes will be capable of scanning out compressed surfaces, and eg. 90/270 degree rotation is not supported in combination with decompression either. This patch may contain work from at least the following people: * Vandana Kannan * Daniel Vetter * Ben Widawsky v2: Deal with display workarounds 0390, 0531, 1125 (Paulo) v3: Pretend CCS tiles are regular 128 byte wide Y tiles (Jason) Put the AUX register defines to the correct place Fix up the slightly bogus rotation check v4: Use I915_WRITE_FW() due to plane update locking changes s/return -EINVAL/goto err/ in intel_framebuffer_init() Eliminate a bunch hardcoded numbers in CCS code v5: (By Ben) conflict resolution + - res_blocks += fixed_16_16_to_u32_round_up(y_tile_minimum); + res_blocks += fixed16_to_u32_round_up(y_tile_minimum); v6: (daniels) Fix botched commit message. Cc: Paulo Zanoni Cc: Daniel Vetter Cc: Ben Widawsky Cc: Jason Ekstrand Signed-off-by: Ville Syrjä Reviewed-by: Ben Widawsky (v1) Reviewed-by: Daniel Stone Signed-off-by: Ben Widawsky Signed-off-by: Daniel Stone Link: http://patchwork.freedesktop.org/patch/msgid/20170801165817.7063-1-ben@bwidawsk.net --- drivers/gpu/drm/i915/intel_pm.c | 29 +++++++++++++++++++++++++++-- 1 file changed, 27 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_pm.c') diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 6e393b217450..4a75b673b85f 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -62,6 +62,20 @@ static void gen9_init_clock_gating(struct drm_i915_private *dev_priv) I915_WRITE(CHICKEN_PAR1_1, I915_READ(CHICKEN_PAR1_1) | SKL_EDP_PSR_FIX_RDWRAP); + /* + * Display WA#0390: skl,bxt,kbl,glk + * + * Must match Sampler, Pixel Back End, and Media + * (0xE194 bit 8, 0x7014 bit 13, 0x4DDC bits 27 and 31). + * + * Including bits outside the page in the hash would + * require 2 (or 4?) MiB alignment of resources. Just + * assume the defaul hashing mode which only uses bits + * within the page. + */ + I915_WRITE(CHICKEN_PAR1_1, + I915_READ(CHICKEN_PAR1_1) & ~SKL_RC_HASH_OUTSIDE); + I915_WRITE(GEN8_CONFIG0, I915_READ(GEN8_CONFIG0) | GEN9_DEFAULT_FIXES); @@ -4077,7 +4091,9 @@ skl_ddb_min_alloc(const struct drm_plane_state *pstate, /* For Non Y-tile return 8-blocks */ if (fb->modifier != I915_FORMAT_MOD_Y_TILED && - fb->modifier != I915_FORMAT_MOD_Yf_TILED) + fb->modifier != I915_FORMAT_MOD_Yf_TILED && + fb->modifier != I915_FORMAT_MOD_Y_TILED_CCS && + fb->modifier != I915_FORMAT_MOD_Yf_TILED_CCS) return 8; /* @@ -4383,7 +4399,9 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, } y_tiled = fb->modifier == I915_FORMAT_MOD_Y_TILED || - fb->modifier == I915_FORMAT_MOD_Yf_TILED; + fb->modifier == I915_FORMAT_MOD_Yf_TILED || + fb->modifier == I915_FORMAT_MOD_Y_TILED_CCS || + fb->modifier == I915_FORMAT_MOD_Yf_TILED_CCS; x_tiled = fb->modifier == I915_FORMAT_MOD_X_TILED; /* Display WA #1141: kbl,cfl */ @@ -4478,6 +4496,13 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, res_lines = div_round_up_fixed16(selected_result, plane_blocks_per_line); + /* Display WA #1125: skl,bxt,kbl,glk */ + if (level == 0 && + (fb->modifier == I915_FORMAT_MOD_Y_TILED_CCS || + fb->modifier == I915_FORMAT_MOD_Yf_TILED_CCS)) + res_blocks += fixed16_to_u32_round_up(y_tile_minimum); + + /* Display WA #1126: skl,bxt,kbl,glk */ if (level >= 1 && level <= 7) { if (y_tiled) { res_blocks += fixed16_to_u32_round_up(y_tile_minimum); -- cgit v1.2.1 From 50682ee63fa3480b0541d0a311239189634b68ab Mon Sep 17 00:00:00 2001 From: Paulo Zanoni Date: Wed, 9 Aug 2017 13:52:43 -0700 Subject: drm/i915/gen10+: use the SKL code for reading WM latencies Gen 10 should use the exact same code as Gen 9, so change the check to take this into consideration, and also assume that future platforms will run this code. Also add a MISSING_CASE(), just in case we do something wrong, instead of silently failing. Cc: Mahesh Kumar Cc: Maarten Lankhorst Signed-off-by: Paulo Zanoni Signed-off-by: Rodrigo Vivi Reviewed-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20170809205248.11917-1-rodrigo.vivi@intel.com --- drivers/gpu/drm/i915/intel_pm.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_pm.c') diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 4a75b673b85f..04697faee4e6 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -2778,7 +2778,7 @@ hsw_compute_linetime_wm(const struct intel_crtc_state *cstate) static void intel_read_wm_latency(struct drm_i915_private *dev_priv, uint16_t wm[8]) { - if (IS_GEN9(dev_priv)) { + if (INTEL_GEN(dev_priv) >= 9) { uint32_t val; int ret, i; int level, max_level = ilk_wm_max_level(dev_priv); @@ -2838,7 +2838,7 @@ static void intel_read_wm_latency(struct drm_i915_private *dev_priv, } /* - * WaWmMemoryReadLatency:skl,glk + * WaWmMemoryReadLatency:skl+,glk * * punit doesn't take into account the read latency so we need * to add 2us to the various latency levels we retrieve from the @@ -2877,6 +2877,8 @@ static void intel_read_wm_latency(struct drm_i915_private *dev_priv, wm[0] = 7; wm[1] = (mltr >> MLTR_WM1_SHIFT) & ILK_SRLT_MASK; wm[2] = (mltr >> MLTR_WM2_SHIFT) & ILK_SRLT_MASK; + } else { + MISSING_CASE(INTEL_DEVID(dev_priv)); } } -- cgit v1.2.1 From 019718196c594d2e33cc371cbbcccb84735e6ada Mon Sep 17 00:00:00 2001 From: Rodrigo Vivi Date: Wed, 9 Aug 2017 13:52:44 -0700 Subject: drm/i915/cnl: Enable SAGV for Cannonlake. For now inherit from previous platforms. v2: Rebase on top of CFL. Cc: Mahesh Kumar Cc: Maarten Lankhorst Cc: Paulo Zanoni Signed-off-by: Rodrigo Vivi Reviewed-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20170809205248.11917-2-rodrigo.vivi@intel.com --- drivers/gpu/drm/i915/intel_pm.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915/intel_pm.c') diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 04697faee4e6..52bf62b6e38c 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3571,7 +3571,8 @@ static bool skl_needs_memory_bw_wa(struct intel_atomic_state *state) static bool intel_has_sagv(struct drm_i915_private *dev_priv) { - if (IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv)) + if (IS_KABYLAKE(dev_priv) || IS_COFFEELAKE(dev_priv) || + IS_CANNONLAKE(dev_priv)) return true; if (IS_SKYLAKE(dev_priv) && -- cgit v1.2.1 From fdd11c2bfce22e57145e861905b2753c0451df85 Mon Sep 17 00:00:00 2001 From: Paulo Zanoni Date: Wed, 9 Aug 2017 13:52:45 -0700 Subject: drm/i915/gen10: fix the gen 10 SAGV block time A previous commit added CNL to intel_has_sagv(), but forgot to adjust the SAGV block time to gen 10 platforms. Cc: Mahesh Kumar Cc: Maarten Lankhorst Signed-off-by: Paulo Zanoni Signed-off-by: Rodrigo Vivi Reviewed-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20170809205248.11917-3-rodrigo.vivi@intel.com --- drivers/gpu/drm/i915/intel_pm.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_pm.c') diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 52bf62b6e38c..f64fdae88b7d 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -3552,8 +3552,6 @@ bool ilk_disable_lp_wm(struct drm_device *dev) return _ilk_disable_lp_wm(dev_priv, WM_DIRTY_LP_ALL); } -#define SKL_SAGV_BLOCK_TIME 30 /* µs */ - /* * FIXME: We still don't have the proper code detect if we need to apply the WA, * so assume we'll always need it in order to avoid underruns. @@ -3678,12 +3676,13 @@ bool intel_can_enable_sagv(struct drm_atomic_state *state) struct intel_crtc_state *cstate; enum pipe pipe; int level, latency; + int sagv_block_time_us = IS_GEN9(dev_priv) ? 30 : 20; if (!intel_has_sagv(dev_priv)) return false; /* - * SKL workaround: bspec recommends we disable the SAGV when we have + * SKL+ workaround: bspec recommends we disable the SAGV when we have * more then one pipe enabled * * If there are no active CRTCs, no additional checks need be performed @@ -3722,11 +3721,11 @@ bool intel_can_enable_sagv(struct drm_atomic_state *state) latency += 15; /* - * If any of the planes on this pipe don't enable wm levels - * that incur memory latencies higher then 30µs we can't enable - * the SAGV + * If any of the planes on this pipe don't enable wm levels that + * incur memory latencies higher than sagv_block_time_us we + * can't enable the SAGV. */ - if (latency < SKL_SAGV_BLOCK_TIME) + if (latency < sagv_block_time_us) return false; } -- cgit v1.2.1 From dfc267ab5acb2ce73078097875f24985942765af Mon Sep 17 00:00:00 2001 From: Paulo Zanoni Date: Wed, 9 Aug 2017 13:52:46 -0700 Subject: drm/i915/gen10: fix WM latency printing Gen 10 is just like Gen 9, so let's consider that all the future platforms are going to be like gen 9 instead of being like gen8-. Cc: Mahesh Kumar Cc: Maarten Lankhorst Signed-off-by: Paulo Zanoni Signed-off-by: Rodrigo Vivi Reviewed-by: Maarten Lankhorst Link: https://patchwork.freedesktop.org/patch/msgid/20170809205248.11917-4-rodrigo.vivi@intel.com --- drivers/gpu/drm/i915/intel_pm.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/gpu/drm/i915/intel_pm.c') diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index f64fdae88b7d..66495ad36973 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -2934,7 +2934,7 @@ static void intel_print_wm_latency(struct drm_i915_private *dev_priv, * - latencies are in us on gen9. * - before then, WM1+ latency values are in 0.5us units */ - if (IS_GEN9(dev_priv)) + if (INTEL_GEN(dev_priv) >= 9) latency *= 10; else if (level > 0) latency *= 5; -- cgit v1.2.1 From 6c64dd378aca528903cb9f7a60d04fc5c1a3bdbd Mon Sep 17 00:00:00 2001 From: Paulo Zanoni Date: Fri, 11 Aug 2017 16:38:25 -0700 Subject: drm/i915/gen10: implement gen 10 watermarks calculations They're slightly different than the gen 9 calculations. v2: Remove TODO comment. Code matches recent spec. v3: Rebase on top of latest skl code using new fp16.16 and fixing a logic issue. Auto rebase bot has apparently made some bad decisions that changed the logic of the code. (Noticed by Manesh, updated by Rodrigo). Cc: Mahesh Kumar Cc: Maarten Lankhorst Signed-off-by: Paulo Zanoni Signed-off-by: Rodrigo Vivi Reviewed-by: Mahesh Kumar Link: https://patchwork.freedesktop.org/patch/msgid/20170811233825.32083-1-rodrigo.vivi@intel.com --- drivers/gpu/drm/i915/intel_pm.c | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) (limited to 'drivers/gpu/drm/i915/intel_pm.c') diff --git a/drivers/gpu/drm/i915/intel_pm.c b/drivers/gpu/drm/i915/intel_pm.c index 66495ad36973..ed662937ec3c 100644 --- a/drivers/gpu/drm/i915/intel_pm.c +++ b/drivers/gpu/drm/i915/intel_pm.c @@ -4290,8 +4290,9 @@ skl_allocate_pipe_ddb(struct intel_crtc_state *cstate, * should allow pixel_rate up to ~2 GHz which seems sufficient since max * 2xcdclk is 1350 MHz and the pixel rate should never exceed that. */ -static uint_fixed_16_16_t skl_wm_method1(uint32_t pixel_rate, uint8_t cpp, - uint32_t latency) +static uint_fixed_16_16_t +skl_wm_method1(const struct drm_i915_private *dev_priv, uint32_t pixel_rate, + uint8_t cpp, uint32_t latency) { uint32_t wm_intermediate_val; uint_fixed_16_16_t ret; @@ -4301,6 +4302,10 @@ static uint_fixed_16_16_t skl_wm_method1(uint32_t pixel_rate, uint8_t cpp, wm_intermediate_val = latency * pixel_rate * cpp; ret = div_fixed16(wm_intermediate_val, 1000 * 512); + + if (INTEL_GEN(dev_priv) >= 10) + ret = add_fixed16_u32(ret, 1); + return ret; } @@ -4456,9 +4461,13 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, if (y_tiled) { interm_pbpl = DIV_ROUND_UP(plane_bytes_per_line * y_min_scanlines, 512); + + if (INTEL_GEN(dev_priv) >= 10) + interm_pbpl++; + plane_blocks_per_line = div_fixed16(interm_pbpl, y_min_scanlines); - } else if (x_tiled) { + } else if (x_tiled && INTEL_GEN(dev_priv) == 9) { interm_pbpl = DIV_ROUND_UP(plane_bytes_per_line, 512); plane_blocks_per_line = u32_to_fixed16(interm_pbpl); } else { @@ -4466,7 +4475,7 @@ static int skl_compute_plane_wm(const struct drm_i915_private *dev_priv, plane_blocks_per_line = u32_to_fixed16(interm_pbpl); } - method1 = skl_wm_method1(plane_pixel_rate, cpp, latency); + method1 = skl_wm_method1(dev_priv, plane_pixel_rate, cpp, latency); method2 = skl_wm_method2(plane_pixel_rate, cstate->base.adjusted_mode.crtc_htotal, latency, -- cgit v1.2.1