diff options
author | Dave Airlie <airlied@redhat.com> | 2014-11-20 14:32:32 +1000 |
---|---|---|
committer | Dave Airlie <airlied@redhat.com> | 2014-11-20 14:32:32 +1000 |
commit | cc5ac1ca79b4976ed3a779d7ea157f078207b56b (patch) | |
tree | d377a52ab17c0a7c2403c23aa862f0c0e4432ed7 /drivers/gpu/drm/radeon | |
parent | 8975626ea35adcca561f8a81dedccfbc5dd8ec72 (diff) | |
parent | ecd5c9821c39626fa7c03e9c397586b24cb11b79 (diff) | |
download | linux-cc5ac1ca79b4976ed3a779d7ea157f078207b56b.tar.gz |
Merge branch 'amdkfd-v6' of git://people.freedesktop.org/~gabbayo/linux into drm-next
Merge AMDKFD it seems clean enough.
* 'amdkfd-v6' of git://people.freedesktop.org/~gabbayo/linux: (29 commits)
amdkfd: Implement the Get Version IOCTL
amdkfd: Implement the Get Process Aperture IOCTL
amdkfd: Implement the Get Clock Counters IOCTL
amdkfd: Implement the Set Memory Policy IOCTL
amdkfd: Implement the create/destroy/update queue IOCTLs
amdkfd: Add interrupt handling module
amdkfd: Add device queue manager module
amdkfd: Add process queue manager module
amdkfd: Add packet manager module
amdkfd: Add module parameter of scheduling policy
amdkfd: Add kernel queue module
amdkfd: Add mqd_manager module
amdkfd: Add queue module
amdkfd: Add binding/unbinding calls to amd_iommu driver
amdkfd: Add basic modules to amdkfd
amdkfd: Add topology module to amdkfd
amdkfd: Add amdkfd skeleton driver
amdkfd: Add IOCTL set definitions of amdkfd
Update MAINTAINERS and CREDITS files with amdkfd info
drm/radeon: Add radeon <--> amdkfd interface
...
Diffstat (limited to 'drivers/gpu/drm/radeon')
-rw-r--r-- | drivers/gpu/drm/radeon/Makefile | 1 | ||||
-rw-r--r-- | drivers/gpu/drm/radeon/cik.c | 155 | ||||
-rw-r--r-- | drivers/gpu/drm/radeon/cik_reg.h | 136 | ||||
-rw-r--r-- | drivers/gpu/drm/radeon/cikd.h | 53 | ||||
-rw-r--r-- | drivers/gpu/drm/radeon/radeon.h | 10 | ||||
-rw-r--r-- | drivers/gpu/drm/radeon/radeon_device.c | 32 | ||||
-rw-r--r-- | drivers/gpu/drm/radeon/radeon_drv.c | 5 | ||||
-rw-r--r-- | drivers/gpu/drm/radeon/radeon_kfd.c | 563 | ||||
-rw-r--r-- | drivers/gpu/drm/radeon/radeon_kfd.h | 47 | ||||
-rw-r--r-- | drivers/gpu/drm/radeon/radeon_kms.c | 7 |
10 files changed, 914 insertions, 95 deletions
diff --git a/drivers/gpu/drm/radeon/Makefile b/drivers/gpu/drm/radeon/Makefile index d01b87991422..bad6caa0a727 100644 --- a/drivers/gpu/drm/radeon/Makefile +++ b/drivers/gpu/drm/radeon/Makefile @@ -104,6 +104,7 @@ radeon-y += \ radeon_vce.o \ vce_v1_0.o \ vce_v2_0.o \ + radeon_kfd.o radeon-$(CONFIG_COMPAT) += radeon_ioc32.o radeon-$(CONFIG_VGA_SWITCHEROO) += radeon_atpx_handler.o diff --git a/drivers/gpu/drm/radeon/cik.c b/drivers/gpu/drm/radeon/cik.c index 57a359d34830..bce73b6203ac 100644 --- a/drivers/gpu/drm/radeon/cik.c +++ b/drivers/gpu/drm/radeon/cik.c @@ -32,6 +32,7 @@ #include "cik_blit_shaders.h" #include "radeon_ucode.h" #include "clearstate_ci.h" +#include "radeon_kfd.h" MODULE_FIRMWARE("radeon/BONAIRE_pfp.bin"); MODULE_FIRMWARE("radeon/BONAIRE_me.bin"); @@ -1563,6 +1564,8 @@ static const u32 godavari_golden_registers[] = static void cik_init_golden_registers(struct radeon_device *rdev) { + /* Some of the registers might be dependent on GRBM_GFX_INDEX */ + mutex_lock(&rdev->grbm_idx_mutex); switch (rdev->family) { case CHIP_BONAIRE: radeon_program_register_sequence(rdev, @@ -1637,6 +1640,7 @@ static void cik_init_golden_registers(struct radeon_device *rdev) default: break; } + mutex_unlock(&rdev->grbm_idx_mutex); } /** @@ -3428,6 +3432,7 @@ static void cik_setup_rb(struct radeon_device *rdev, u32 disabled_rbs = 0; u32 enabled_rbs = 0; + mutex_lock(&rdev->grbm_idx_mutex); for (i = 0; i < se_num; i++) { for (j = 0; j < sh_per_se; j++) { cik_select_se_sh(rdev, i, j); @@ -3439,6 +3444,7 @@ static void cik_setup_rb(struct radeon_device *rdev, } } cik_select_se_sh(rdev, 0xffffffff, 0xffffffff); + mutex_unlock(&rdev->grbm_idx_mutex); mask = 1; for (i = 0; i < max_rb_num_per_se * se_num; i++) { @@ -3449,6 +3455,7 @@ static void cik_setup_rb(struct radeon_device *rdev, rdev->config.cik.backend_enable_mask = enabled_rbs; + mutex_lock(&rdev->grbm_idx_mutex); for (i = 0; i < se_num; i++) { cik_select_se_sh(rdev, i, 0xffffffff); data = 0; @@ -3476,6 +3483,7 @@ static void cik_setup_rb(struct radeon_device *rdev, WREG32(PA_SC_RASTER_CONFIG, data); } cik_select_se_sh(rdev, 0xffffffff, 0xffffffff); + mutex_unlock(&rdev->grbm_idx_mutex); } /** @@ -3693,6 +3701,12 @@ static void cik_gpu_init(struct radeon_device *rdev) /* set HW defaults for 3D engine */ WREG32(CP_MEQ_THRESHOLDS, MEQ1_START(0x30) | MEQ2_START(0x60)); + mutex_lock(&rdev->grbm_idx_mutex); + /* + * making sure that the following register writes will be broadcasted + * to all the shaders + */ + cik_select_se_sh(rdev, 0xffffffff, 0xffffffff); WREG32(SX_DEBUG_1, 0x20); WREG32(TA_CNTL_AUX, 0x00010000); @@ -3748,6 +3762,7 @@ static void cik_gpu_init(struct radeon_device *rdev) WREG32(PA_CL_ENHANCE, CLIP_VTX_REORDER_ENA | NUM_CLIP_SEQ(3)); WREG32(PA_SC_ENHANCE, ENABLE_PA_SC_OUT_OF_ORDER); + mutex_unlock(&rdev->grbm_idx_mutex); udelay(50); } @@ -4684,12 +4699,11 @@ static int cik_mec_init(struct radeon_device *rdev) /* * KV: 2 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 64 Queues total * CI/KB: 1 MEC, 4 Pipes/MEC, 8 Queues/Pipe - 32 Queues total + * Nonetheless, we assign only 1 pipe because all other pipes will + * be handled by KFD */ - if (rdev->family == CHIP_KAVERI) - rdev->mec.num_mec = 2; - else - rdev->mec.num_mec = 1; - rdev->mec.num_pipe = 4; + rdev->mec.num_mec = 1; + rdev->mec.num_pipe = 1; rdev->mec.num_queue = rdev->mec.num_mec * rdev->mec.num_pipe * 8; if (rdev->mec.hpd_eop_obj == NULL) { @@ -4831,28 +4845,24 @@ static int cik_cp_compute_resume(struct radeon_device *rdev) /* init the pipes */ mutex_lock(&rdev->srbm_mutex); - for (i = 0; i < (rdev->mec.num_pipe * rdev->mec.num_mec); i++) { - int me = (i < 4) ? 1 : 2; - int pipe = (i < 4) ? i : (i - 4); - eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr + (i * MEC_HPD_SIZE * 2); + eop_gpu_addr = rdev->mec.hpd_eop_gpu_addr; - cik_srbm_select(rdev, me, pipe, 0, 0); + cik_srbm_select(rdev, 0, 0, 0, 0); - /* write the EOP addr */ - WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8); - WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8); + /* write the EOP addr */ + WREG32(CP_HPD_EOP_BASE_ADDR, eop_gpu_addr >> 8); + WREG32(CP_HPD_EOP_BASE_ADDR_HI, upper_32_bits(eop_gpu_addr) >> 8); - /* set the VMID assigned */ - WREG32(CP_HPD_EOP_VMID, 0); + /* set the VMID assigned */ + WREG32(CP_HPD_EOP_VMID, 0); + + /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ + tmp = RREG32(CP_HPD_EOP_CONTROL); + tmp &= ~EOP_SIZE_MASK; + tmp |= order_base_2(MEC_HPD_SIZE / 8); + WREG32(CP_HPD_EOP_CONTROL, tmp); - /* set the EOP size, register value is 2^(EOP_SIZE+1) dwords */ - tmp = RREG32(CP_HPD_EOP_CONTROL); - tmp &= ~EOP_SIZE_MASK; - tmp |= order_base_2(MEC_HPD_SIZE / 8); - WREG32(CP_HPD_EOP_CONTROL, tmp); - } - cik_srbm_select(rdev, 0, 0, 0, 0); mutex_unlock(&rdev->srbm_mutex); /* init the queues. Just two for now. */ @@ -5906,8 +5916,13 @@ int cik_ib_parse(struct radeon_device *rdev, struct radeon_ib *ib) */ int cik_vm_init(struct radeon_device *rdev) { - /* number of VMs */ - rdev->vm_manager.nvm = 16; + /* + * number of VMs + * VMID 0 is reserved for System + * radeon graphics/compute will use VMIDs 1-7 + * amdkfd will use VMIDs 8-15 + */ + rdev->vm_manager.nvm = RADEON_NUM_OF_VMIDS; /* base offset of vram pages */ if (rdev->flags & RADEON_IS_IGP) { u64 tmp = RREG32(MC_VM_FB_OFFSET); @@ -6068,6 +6083,7 @@ static void cik_wait_for_rlc_serdes(struct radeon_device *rdev) u32 i, j, k; u32 mask; + mutex_lock(&rdev->grbm_idx_mutex); for (i = 0; i < rdev->config.cik.max_shader_engines; i++) { for (j = 0; j < rdev->config.cik.max_sh_per_se; j++) { cik_select_se_sh(rdev, i, j); @@ -6079,6 +6095,7 @@ static void cik_wait_for_rlc_serdes(struct radeon_device *rdev) } } cik_select_se_sh(rdev, 0xffffffff, 0xffffffff); + mutex_unlock(&rdev->grbm_idx_mutex); mask = SE_MASTER_BUSY_MASK | GC_MASTER_BUSY | TC0_MASTER_BUSY | TC1_MASTER_BUSY; for (k = 0; k < rdev->usec_timeout; k++) { @@ -6213,10 +6230,12 @@ static int cik_rlc_resume(struct radeon_device *rdev) WREG32(RLC_LB_CNTR_INIT, 0); WREG32(RLC_LB_CNTR_MAX, 0x00008000); + mutex_lock(&rdev->grbm_idx_mutex); cik_select_se_sh(rdev, 0xffffffff, 0xffffffff); WREG32(RLC_LB_INIT_CU_MASK, 0xffffffff); WREG32(RLC_LB_PARAMS, 0x00600408); WREG32(RLC_LB_CNTL, 0x80000004); + mutex_unlock(&rdev->grbm_idx_mutex); WREG32(RLC_MC_CNTL, 0); WREG32(RLC_UCODE_CNTL, 0); @@ -6283,11 +6302,13 @@ static void cik_enable_cgcg(struct radeon_device *rdev, bool enable) tmp = cik_halt_rlc(rdev); + mutex_lock(&rdev->grbm_idx_mutex); cik_select_se_sh(rdev, 0xffffffff, 0xffffffff); WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff); WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff); tmp2 = BPM_ADDR_MASK | CGCG_OVERRIDE_0 | CGLS_ENABLE; WREG32(RLC_SERDES_WR_CTRL, tmp2); + mutex_unlock(&rdev->grbm_idx_mutex); cik_update_rlc(rdev, tmp); @@ -6329,11 +6350,13 @@ static void cik_enable_mgcg(struct radeon_device *rdev, bool enable) tmp = cik_halt_rlc(rdev); + mutex_lock(&rdev->grbm_idx_mutex); cik_select_se_sh(rdev, 0xffffffff, 0xffffffff); WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff); WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff); data = BPM_ADDR_MASK | MGCG_OVERRIDE_0; WREG32(RLC_SERDES_WR_CTRL, data); + mutex_unlock(&rdev->grbm_idx_mutex); cik_update_rlc(rdev, tmp); @@ -6377,11 +6400,13 @@ static void cik_enable_mgcg(struct radeon_device *rdev, bool enable) tmp = cik_halt_rlc(rdev); + mutex_lock(&rdev->grbm_idx_mutex); cik_select_se_sh(rdev, 0xffffffff, 0xffffffff); WREG32(RLC_SERDES_WR_CU_MASTER_MASK, 0xffffffff); WREG32(RLC_SERDES_WR_NONCU_MASTER_MASK, 0xffffffff); data = BPM_ADDR_MASK | MGCG_OVERRIDE_1; WREG32(RLC_SERDES_WR_CTRL, data); + mutex_unlock(&rdev->grbm_idx_mutex); cik_update_rlc(rdev, tmp); } @@ -6810,10 +6835,12 @@ static u32 cik_get_cu_active_bitmap(struct radeon_device *rdev, u32 se, u32 sh) u32 mask = 0, tmp, tmp1; int i; + mutex_lock(&rdev->grbm_idx_mutex); cik_select_se_sh(rdev, se, sh); tmp = RREG32(CC_GC_SHADER_ARRAY_CONFIG); tmp1 = RREG32(GC_USER_SHADER_ARRAY_CONFIG); cik_select_se_sh(rdev, 0xffffffff, 0xffffffff); + mutex_unlock(&rdev->grbm_idx_mutex); tmp &= 0xffff0000; @@ -7297,8 +7324,7 @@ static int cik_irq_init(struct radeon_device *rdev) int cik_irq_set(struct radeon_device *rdev) { u32 cp_int_cntl; - u32 cp_m1p0, cp_m1p1, cp_m1p2, cp_m1p3; - u32 cp_m2p0, cp_m2p1, cp_m2p2, cp_m2p3; + u32 cp_m1p0; u32 crtc1 = 0, crtc2 = 0, crtc3 = 0, crtc4 = 0, crtc5 = 0, crtc6 = 0; u32 hpd1, hpd2, hpd3, hpd4, hpd5, hpd6; u32 grbm_int_cntl = 0; @@ -7332,13 +7358,6 @@ int cik_irq_set(struct radeon_device *rdev) dma_cntl1 = RREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET) & ~TRAP_ENABLE; cp_m1p0 = RREG32(CP_ME1_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE; - cp_m1p1 = RREG32(CP_ME1_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE; - cp_m1p2 = RREG32(CP_ME1_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE; - cp_m1p3 = RREG32(CP_ME1_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE; - cp_m2p0 = RREG32(CP_ME2_PIPE0_INT_CNTL) & ~TIME_STAMP_INT_ENABLE; - cp_m2p1 = RREG32(CP_ME2_PIPE1_INT_CNTL) & ~TIME_STAMP_INT_ENABLE; - cp_m2p2 = RREG32(CP_ME2_PIPE2_INT_CNTL) & ~TIME_STAMP_INT_ENABLE; - cp_m2p3 = RREG32(CP_ME2_PIPE3_INT_CNTL) & ~TIME_STAMP_INT_ENABLE; if (rdev->flags & RADEON_IS_IGP) thermal_int = RREG32_SMC(CG_THERMAL_INT_CTRL) & @@ -7360,33 +7379,6 @@ int cik_irq_set(struct radeon_device *rdev) case 0: cp_m1p0 |= TIME_STAMP_INT_ENABLE; break; - case 1: - cp_m1p1 |= TIME_STAMP_INT_ENABLE; - break; - case 2: - cp_m1p2 |= TIME_STAMP_INT_ENABLE; - break; - case 3: - cp_m1p2 |= TIME_STAMP_INT_ENABLE; - break; - default: - DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe); - break; - } - } else if (ring->me == 2) { - switch (ring->pipe) { - case 0: - cp_m2p0 |= TIME_STAMP_INT_ENABLE; - break; - case 1: - cp_m2p1 |= TIME_STAMP_INT_ENABLE; - break; - case 2: - cp_m2p2 |= TIME_STAMP_INT_ENABLE; - break; - case 3: - cp_m2p2 |= TIME_STAMP_INT_ENABLE; - break; default: DRM_DEBUG("si_irq_set: sw int cp1 invalid pipe %d\n", ring->pipe); break; @@ -7403,33 +7395,6 @@ int cik_irq_set(struct radeon_device *rdev) case 0: cp_m1p0 |= TIME_STAMP_INT_ENABLE; break; - case 1: - cp_m1p1 |= TIME_STAMP_INT_ENABLE; - break; - case 2: - cp_m1p2 |= TIME_STAMP_INT_ENABLE; - break; - case 3: - cp_m1p2 |= TIME_STAMP_INT_ENABLE; - break; - default: - DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe); - break; - } - } else if (ring->me == 2) { - switch (ring->pipe) { - case 0: - cp_m2p0 |= TIME_STAMP_INT_ENABLE; - break; - case 1: - cp_m2p1 |= TIME_STAMP_INT_ENABLE; - break; - case 2: - cp_m2p2 |= TIME_STAMP_INT_ENABLE; - break; - case 3: - cp_m2p2 |= TIME_STAMP_INT_ENABLE; - break; default: DRM_DEBUG("si_irq_set: sw int cp2 invalid pipe %d\n", ring->pipe); break; @@ -7518,13 +7483,6 @@ int cik_irq_set(struct radeon_device *rdev) WREG32(SDMA0_CNTL + SDMA1_REGISTER_OFFSET, dma_cntl1); WREG32(CP_ME1_PIPE0_INT_CNTL, cp_m1p0); - WREG32(CP_ME1_PIPE1_INT_CNTL, cp_m1p1); - WREG32(CP_ME1_PIPE2_INT_CNTL, cp_m1p2); - WREG32(CP_ME1_PIPE3_INT_CNTL, cp_m1p3); - WREG32(CP_ME2_PIPE0_INT_CNTL, cp_m2p0); - WREG32(CP_ME2_PIPE1_INT_CNTL, cp_m2p1); - WREG32(CP_ME2_PIPE2_INT_CNTL, cp_m2p2); - WREG32(CP_ME2_PIPE3_INT_CNTL, cp_m2p3); WREG32(GRBM_INT_CNTL, grbm_int_cntl); @@ -7841,6 +7799,10 @@ restart_ih: while (rptr != wptr) { /* wptr/rptr are in bytes! */ ring_index = rptr / 4; + + radeon_kfd_interrupt(rdev, + (const void *) &rdev->ih.ring[ring_index]); + src_id = le32_to_cpu(rdev->ih.ring[ring_index]) & 0xff; src_data = le32_to_cpu(rdev->ih.ring[ring_index + 1]) & 0xfffffff; ring_id = le32_to_cpu(rdev->ih.ring[ring_index + 2]) & 0xff; @@ -8530,6 +8492,10 @@ static int cik_startup(struct radeon_device *rdev) if (r) return r; + r = radeon_kfd_resume(rdev); + if (r) + return r; + return 0; } @@ -8578,6 +8544,7 @@ int cik_resume(struct radeon_device *rdev) */ int cik_suspend(struct radeon_device *rdev) { + radeon_kfd_suspend(rdev); radeon_pm_suspend(rdev); dce6_audio_fini(rdev); radeon_vm_manager_fini(rdev); diff --git a/drivers/gpu/drm/radeon/cik_reg.h b/drivers/gpu/drm/radeon/cik_reg.h index ca1bb6133580..79c45e8a536b 100644 --- a/drivers/gpu/drm/radeon/cik_reg.h +++ b/drivers/gpu/drm/radeon/cik_reg.h @@ -147,4 +147,140 @@ #define CIK_LB_DESKTOP_HEIGHT 0x6b0c +#define CP_HQD_IQ_RPTR 0xC970u +#define AQL_ENABLE (1U << 0) + +#define IDLE (1 << 2) + +struct cik_mqd { + uint32_t header; + uint32_t compute_dispatch_initiator; + uint32_t compute_dim_x; + uint32_t compute_dim_y; + uint32_t compute_dim_z; + uint32_t compute_start_x; + uint32_t compute_start_y; + uint32_t compute_start_z; + uint32_t compute_num_thread_x; + uint32_t compute_num_thread_y; + uint32_t compute_num_thread_z; + uint32_t compute_pipelinestat_enable; + uint32_t compute_perfcount_enable; + uint32_t compute_pgm_lo; + uint32_t compute_pgm_hi; + uint32_t compute_tba_lo; + uint32_t compute_tba_hi; + uint32_t compute_tma_lo; + uint32_t compute_tma_hi; + uint32_t compute_pgm_rsrc1; + uint32_t compute_pgm_rsrc2; + uint32_t compute_vmid; + uint32_t compute_resource_limits; + uint32_t compute_static_thread_mgmt_se0; + uint32_t compute_static_thread_mgmt_se1; + uint32_t compute_tmpring_size; + uint32_t compute_static_thread_mgmt_se2; + uint32_t compute_static_thread_mgmt_se3; + uint32_t compute_restart_x; + uint32_t compute_restart_y; + uint32_t compute_restart_z; + uint32_t compute_thread_trace_enable; + uint32_t compute_misc_reserved; + uint32_t compute_user_data_0; + uint32_t compute_user_data_1; + uint32_t compute_user_data_2; + uint32_t compute_user_data_3; + uint32_t compute_user_data_4; + uint32_t compute_user_data_5; + uint32_t compute_user_data_6; + uint32_t compute_user_data_7; + uint32_t compute_user_data_8; + uint32_t compute_user_data_9; + uint32_t compute_user_data_10; + uint32_t compute_user_data_11; + uint32_t compute_user_data_12; + uint32_t compute_user_data_13; + uint32_t compute_user_data_14; + uint32_t compute_user_data_15; + uint32_t cp_compute_csinvoc_count_lo; + uint32_t cp_compute_csinvoc_count_hi; + uint32_t cp_mqd_base_addr_lo; + uint32_t cp_mqd_base_addr_hi; + uint32_t cp_hqd_active; + uint32_t cp_hqd_vmid; + uint32_t cp_hqd_persistent_state; + uint32_t cp_hqd_pipe_priority; + uint32_t cp_hqd_queue_priority; + uint32_t cp_hqd_quantum; + uint32_t cp_hqd_pq_base_lo; + uint32_t cp_hqd_pq_base_hi; + uint32_t cp_hqd_pq_rptr; + uint32_t cp_hqd_pq_rptr_report_addr_lo; + uint32_t cp_hqd_pq_rptr_report_addr_hi; + uint32_t cp_hqd_pq_wptr_poll_addr_lo; + uint32_t cp_hqd_pq_wptr_poll_addr_hi; + uint32_t cp_hqd_pq_doorbell_control; + uint32_t cp_hqd_pq_wptr; + uint32_t cp_hqd_pq_control; + uint32_t cp_hqd_ib_base_addr_lo; + uint32_t cp_hqd_ib_base_addr_hi; + uint32_t cp_hqd_ib_rptr; + uint32_t cp_hqd_ib_control; + uint32_t cp_hqd_iq_timer; + uint32_t cp_hqd_iq_rptr; + uint32_t cp_hqd_dequeue_request; + uint32_t cp_hqd_dma_offload; + uint32_t cp_hqd_sema_cmd; + uint32_t cp_hqd_msg_type; + uint32_t cp_hqd_atomic0_preop_lo; + uint32_t cp_hqd_atomic0_preop_hi; + uint32_t cp_hqd_atomic1_preop_lo; + uint32_t cp_hqd_atomic1_preop_hi; + uint32_t cp_hqd_hq_status0; + uint32_t cp_hqd_hq_control0; + uint32_t cp_mqd_control; + uint32_t cp_mqd_query_time_lo; + uint32_t cp_mqd_query_time_hi; + uint32_t cp_mqd_connect_start_time_lo; + uint32_t cp_mqd_connect_start_time_hi; + uint32_t cp_mqd_connect_end_time_lo; + uint32_t cp_mqd_connect_end_time_hi; + uint32_t cp_mqd_connect_end_wf_count; + uint32_t cp_mqd_connect_end_pq_rptr; + uint32_t cp_mqd_connect_end_pq_wptr; + uint32_t cp_mqd_connect_end_ib_rptr; + uint32_t reserved_96; + uint32_t reserved_97; + uint32_t reserved_98; + uint32_t reserved_99; + uint32_t iqtimer_pkt_header; + uint32_t iqtimer_pkt_dw0; + uint32_t iqtimer_pkt_dw1; + uint32_t iqtimer_pkt_dw2; + uint32_t iqtimer_pkt_dw3; + uint32_t iqtimer_pkt_dw4; + uint32_t iqtimer_pkt_dw5; + uint32_t iqtimer_pkt_dw6; + uint32_t reserved_108; + uint32_t reserved_109; + uint32_t reserved_110; + uint32_t reserved_111; + uint32_t queue_doorbell_id0; + uint32_t queue_doorbell_id1; + uint32_t queue_doorbell_id2; + uint32_t queue_doorbell_id3; + uint32_t queue_doorbell_id4; + uint32_t queue_doorbell_id5; + uint32_t queue_doorbell_id6; + uint32_t queue_doorbell_id7; + uint32_t queue_doorbell_id8; + uint32_t queue_doorbell_id9; + uint32_t queue_doorbell_id10; + uint32_t queue_doorbell_id11; + uint32_t queue_doorbell_id12; + uint32_t queue_doorbell_id13; + uint32_t queue_doorbell_id14; + uint32_t queue_doorbell_id15; +}; + #endif diff --git a/drivers/gpu/drm/radeon/cikd.h b/drivers/gpu/drm/radeon/cikd.h index 0c6e1b55d968..068cbb019326 100644 --- a/drivers/gpu/drm/radeon/cikd.h +++ b/drivers/gpu/drm/radeon/cikd.h @@ -30,6 +30,8 @@ #define CIK_RB_BITMAP_WIDTH_PER_SH 2 #define HAWAII_RB_BITMAP_WIDTH_PER_SH 4 +#define RADEON_NUM_OF_VMIDS 8 + /* DIDT IND registers */ #define DIDT_SQ_CTRL0 0x0 # define DIDT_CTRL_EN (1 << 0) @@ -1137,6 +1139,9 @@ #define SH_MEM_ALIGNMENT_MODE_UNALIGNED 3 #define DEFAULT_MTYPE(x) ((x) << 4) #define APE1_MTYPE(x) ((x) << 7) +/* valid for both DEFAULT_MTYPE and APE1_MTYPE */ +#define MTYPE_CACHED 0 +#define MTYPE_NONCACHED 3 #define SX_DEBUG_1 0x9060 @@ -1447,6 +1452,16 @@ #define CP_HQD_ACTIVE 0xC91C #define CP_HQD_VMID 0xC920 +#define CP_HQD_PERSISTENT_STATE 0xC924u +#define DEFAULT_CP_HQD_PERSISTENT_STATE (0x33U << 8) + +#define CP_HQD_PIPE_PRIORITY 0xC928u +#define CP_HQD_QUEUE_PRIORITY 0xC92Cu +#define CP_HQD_QUANTUM 0xC930u +#define QUANTUM_EN 1U +#define QUANTUM_SCALE_1MS (1U << 4) +#define QUANTUM_DURATION(x) ((x) << 8) + #define CP_HQD_PQ_BASE 0xC934 #define CP_HQD_PQ_BASE_HI 0xC938 #define CP_HQD_PQ_RPTR 0xC93C @@ -1474,12 +1489,32 @@ #define PRIV_STATE (1 << 30) #define KMD_QUEUE (1 << 31) -#define CP_HQD_DEQUEUE_REQUEST 0xC974 +#define CP_HQD_IB_BASE_ADDR 0xC95Cu +#define CP_HQD_IB_BASE_ADDR_HI 0xC960u +#define CP_HQD_IB_RPTR 0xC964u +#define CP_HQD_IB_CONTROL 0xC968u +#define IB_ATC_EN (1U << 23) +#define DEFAULT_MIN_IB_AVAIL_SIZE (3U << 20) + +#define CP_HQD_DEQUEUE_REQUEST 0xC974 +#define DEQUEUE_REQUEST_DRAIN 1 +#define DEQUEUE_REQUEST_RESET 2 #define CP_MQD_CONTROL 0xC99C #define MQD_VMID(x) ((x) << 0) #define MQD_VMID_MASK (0xf << 0) +#define CP_HQD_SEMA_CMD 0xC97Cu +#define CP_HQD_MSG_TYPE 0xC980u +#define CP_HQD_ATOMIC0_PREOP_LO 0xC984u +#define CP_HQD_ATOMIC0_PREOP_HI 0xC988u +#define CP_HQD_ATOMIC1_PREOP_LO 0xC98Cu +#define CP_HQD_ATOMIC1_PREOP_HI 0xC990u +#define CP_HQD_HQ_SCHEDULER0 0xC994u +#define CP_HQD_HQ_SCHEDULER1 0xC998u + +#define SH_STATIC_MEM_CONFIG 0x9604u + #define DB_RENDER_CONTROL 0x28000 #define PA_SC_RASTER_CONFIG 0x28350 @@ -2069,4 +2104,20 @@ #define VCE_CMD_IB_AUTO 0x00000005 #define VCE_CMD_SEMAPHORE 0x00000006 +#define ATC_VMID0_PASID_MAPPING 0x339Cu +#define ATC_VMID_PASID_MAPPING_UPDATE_STATUS 0x3398u +#define ATC_VMID_PASID_MAPPING_VALID (1U << 31) + +#define ATC_VM_APERTURE0_CNTL 0x3310u +#define ATS_ACCESS_MODE_NEVER 0 +#define ATS_ACCESS_MODE_ALWAYS 1 + +#define ATC_VM_APERTURE0_CNTL2 0x3318u +#define ATC_VM_APERTURE0_HIGH_ADDR 0x3308u +#define ATC_VM_APERTURE0_LOW_ADDR 0x3300u +#define ATC_VM_APERTURE1_CNTL 0x3314u +#define ATC_VM_APERTURE1_CNTL2 0x331Cu +#define ATC_VM_APERTURE1_HIGH_ADDR 0x330Cu +#define ATC_VM_APERTURE1_LOW_ADDR 0x3304u + #endif diff --git a/drivers/gpu/drm/radeon/radeon.h b/drivers/gpu/drm/radeon/radeon.h index 908f349a6362..1f61ff089c9e 100644 --- a/drivers/gpu/drm/radeon/radeon.h +++ b/drivers/gpu/drm/radeon/radeon.h @@ -701,6 +701,10 @@ struct radeon_doorbell { int radeon_doorbell_get(struct radeon_device *rdev, u32 *page); void radeon_doorbell_free(struct radeon_device *rdev, u32 doorbell); +void radeon_doorbell_get_kfd_info(struct radeon_device *rdev, + phys_addr_t *aperture_base, + size_t *aperture_size, + size_t *start_offset); /* * IRQS. @@ -2393,6 +2397,8 @@ struct radeon_device { struct radeon_atcs atcs; /* srbm instance registers */ struct mutex srbm_mutex; + /* GRBM index mutex. Protects concurrents access to GRBM index */ + struct mutex grbm_idx_mutex; /* clock, powergating flags */ u32 cg_flags; u32 pg_flags; @@ -2405,6 +2411,10 @@ struct radeon_device { u64 vram_pin_size; u64 gart_pin_size; + /* amdkfd interface */ + struct kfd_dev *kfd; + struct radeon_sa_manager kfd_bo; + struct mutex mn_lock; DECLARE_HASHTABLE(mn_hash, 7); }; diff --git a/drivers/gpu/drm/radeon/radeon_device.c b/drivers/gpu/drm/radeon/radeon_device.c index ea2676954dde..ae87310fd96e 100644 --- a/drivers/gpu/drm/radeon/radeon_device.c +++ b/drivers/gpu/drm/radeon/radeon_device.c @@ -377,6 +377,37 @@ void radeon_doorbell_free(struct radeon_device *rdev, u32 doorbell) __clear_bit(doorbell, rdev->doorbell.used); } +/** + * radeon_doorbell_get_kfd_info - Report doorbell configuration required to + * setup KFD + * + * @rdev: radeon_device pointer + * @aperture_base: output returning doorbell aperture base physical address + * @aperture_size: output returning doorbell aperture size in bytes + * @start_offset: output returning # of doorbell bytes reserved for radeon. + * + * Radeon and the KFD share the doorbell aperture. Radeon sets it up, + * takes doorbells required for its own rings and reports the setup to KFD. + * Radeon reserved doorbells are at the start of the doorbell aperture. + */ +void radeon_doorbell_get_kfd_info(struct radeon_device *rdev, + phys_addr_t *aperture_base, + size_t *aperture_size, + size_t *start_offset) +{ + /* The first num_doorbells are used by radeon. + * KFD takes whatever's left in the aperture. */ + if (rdev->doorbell.size > rdev->doorbell.num_doorbells * sizeof(u32)) { + *aperture_base = rdev->doorbell.base; + *aperture_size = rdev->doorbell.size; + *start_offset = rdev->doorbell.num_doorbells * sizeof(u32); + } else { + *aperture_base = 0; + *aperture_size = 0; + *start_offset = 0; + } +} + /* * radeon_wb_*() * Writeback is the the method by which the the GPU updates special pages @@ -1272,6 +1303,7 @@ int radeon_device_init(struct radeon_device *rdev, mutex_init(&rdev->pm.mutex); mutex_init(&rdev->gpu_clock_mutex); mutex_init(&rdev->srbm_mutex); + mutex_init(&rdev->grbm_idx_mutex); init_rwsem(&rdev->pm.mclk_lock); init_rwsem(&rdev->exclusive_lock); init_waitqueue_head(&rdev->irq.vblank_queue); diff --git a/drivers/gpu/drm/radeon/radeon_drv.c b/drivers/gpu/drm/radeon/radeon_drv.c index dcffa30ee2db..4f50fb0e3d93 100644 --- a/drivers/gpu/drm/radeon/radeon_drv.c +++ b/drivers/gpu/drm/radeon/radeon_drv.c @@ -41,6 +41,8 @@ #include <drm/drm_gem.h> #include "drm_crtc_helper.h" +#include "radeon_kfd.h" + /* * KMS wrapper. * - 2.0.0 - initial interface @@ -654,12 +656,15 @@ static int __init radeon_init(void) #endif } + radeon_kfd_init(); + /* let modprobe override vga console setting */ return drm_pci_init(driver, pdriver); } static void __exit radeon_exit(void) { + radeon_kfd_fini(); drm_pci_exit(driver, pdriver); radeon_unregister_atpx_handler(); } diff --git a/drivers/gpu/drm/radeon/radeon_kfd.c b/drivers/gpu/drm/radeon/radeon_kfd.c new file mode 100644 index 000000000000..065d02068ec3 --- /dev/null +++ b/drivers/gpu/drm/radeon/radeon_kfd.c @@ -0,0 +1,563 @@ +/* + * Copyright 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +#include <linux/module.h> +#include <linux/fdtable.h> +#include <linux/uaccess.h> +#include <drm/drmP.h> +#include "radeon.h" +#include "cikd.h" +#include "cik_reg.h" +#include "radeon_kfd.h" + +#define CIK_PIPE_PER_MEC (4) + +struct kgd_mem { + struct radeon_sa_bo *sa_bo; + uint64_t gpu_addr; + void *ptr; +}; + +static int init_sa_manager(struct kgd_dev *kgd, unsigned int size); +static void fini_sa_manager(struct kgd_dev *kgd); + +static int allocate_mem(struct kgd_dev *kgd, size_t size, size_t alignment, + enum kgd_memory_pool pool, struct kgd_mem **mem); + +static void free_mem(struct kgd_dev *kgd, struct kgd_mem *mem); + +static uint64_t get_vmem_size(struct kgd_dev *kgd); +static uint64_t get_gpu_clock_counter(struct kgd_dev *kgd); + +static uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd); + +/* + * Register access functions + */ + +static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, + uint32_t sh_mem_config, uint32_t sh_mem_ape1_base, + uint32_t sh_mem_ape1_limit, uint32_t sh_mem_bases); + +static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, + unsigned int vmid); + +static int kgd_init_memory(struct kgd_dev *kgd); + +static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, + uint32_t hpd_size, uint64_t hpd_gpu_addr); + +static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, + uint32_t queue_id, uint32_t __user *wptr); + +static bool kgd_hqd_is_occupies(struct kgd_dev *kgd, uint64_t queue_address, + uint32_t pipe_id, uint32_t queue_id); + +static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type, + unsigned int timeout, uint32_t pipe_id, + uint32_t queue_id); + +static const struct kfd2kgd_calls kfd2kgd = { + .init_sa_manager = init_sa_manager, + .fini_sa_manager = fini_sa_manager, + .allocate_mem = allocate_mem, + .free_mem = free_mem, + .get_vmem_size = get_vmem_size, + .get_gpu_clock_counter = get_gpu_clock_counter, + .get_max_engine_clock_in_mhz = get_max_engine_clock_in_mhz, + .program_sh_mem_settings = kgd_program_sh_mem_settings, + .set_pasid_vmid_mapping = kgd_set_pasid_vmid_mapping, + .init_memory = kgd_init_memory, + .init_pipeline = kgd_init_pipeline, + .hqd_load = kgd_hqd_load, + .hqd_is_occupies = kgd_hqd_is_occupies, + .hqd_destroy = kgd_hqd_destroy, +}; + +static const struct kgd2kfd_calls *kgd2kfd; + +bool radeon_kfd_init(void) +{ + bool (*kgd2kfd_init_p)(unsigned, const struct kfd2kgd_calls*, + const struct kgd2kfd_calls**); + + kgd2kfd_init_p = symbol_request(kgd2kfd_init); + + if (kgd2kfd_init_p == NULL) + return false; + + if (!kgd2kfd_init_p(KFD_INTERFACE_VERSION, &kfd2kgd, &kgd2kfd)) { + symbol_put(kgd2kfd_init); + kgd2kfd = NULL; + + return false; + } + + return true; +} + +void radeon_kfd_fini(void) +{ + if (kgd2kfd) { + kgd2kfd->exit(); + symbol_put(kgd2kfd_init); + } +} + +void radeon_kfd_device_probe(struct radeon_device *rdev) +{ + if (kgd2kfd) + rdev->kfd = kgd2kfd->probe((struct kgd_dev *)rdev, rdev->pdev); +} + +void radeon_kfd_device_init(struct radeon_device *rdev) +{ + if (rdev->kfd) { + struct kgd2kfd_shared_resources gpu_resources = { + .compute_vmid_bitmap = 0xFF00, + + .first_compute_pipe = 1, + .compute_pipe_count = 8 - 1, + }; + + radeon_doorbell_get_kfd_info(rdev, + &gpu_resources.doorbell_physical_address, + &gpu_resources.doorbell_aperture_size, + &gpu_resources.doorbell_start_offset); + + kgd2kfd->device_init(rdev->kfd, &gpu_resources); + } +} + +void radeon_kfd_device_fini(struct radeon_device *rdev) +{ + if (rdev->kfd) { + kgd2kfd->device_exit(rdev->kfd); + rdev->kfd = NULL; + } +} + +void radeon_kfd_interrupt(struct radeon_device *rdev, const void *ih_ring_entry) +{ + if (rdev->kfd) + kgd2kfd->interrupt(rdev->kfd, ih_ring_entry); +} + +void radeon_kfd_suspend(struct radeon_device *rdev) +{ + if (rdev->kfd) + kgd2kfd->suspend(rdev->kfd); +} + +int radeon_kfd_resume(struct radeon_device *rdev) +{ + int r = 0; + + if (rdev->kfd) + r = kgd2kfd->resume(rdev->kfd); + + return r; +} + +static u32 pool_to_domain(enum kgd_memory_pool p) +{ + switch (p) { + case KGD_POOL_FRAMEBUFFER: return RADEON_GEM_DOMAIN_VRAM; + default: return RADEON_GEM_DOMAIN_GTT; + } +} + +static int init_sa_manager(struct kgd_dev *kgd, unsigned int size) +{ + struct radeon_device *rdev = (struct radeon_device *)kgd; + int r; + + BUG_ON(kgd == NULL); + + r = radeon_sa_bo_manager_init(rdev, &rdev->kfd_bo, + size, + RADEON_GPU_PAGE_SIZE, + RADEON_GEM_DOMAIN_GTT, + RADEON_GEM_GTT_WC); + + if (r) + return r; + + r = radeon_sa_bo_manager_start(rdev, &rdev->kfd_bo); + if (r) + radeon_sa_bo_manager_fini(rdev, &rdev->kfd_bo); + + return r; +} + +static void fini_sa_manager(struct kgd_dev *kgd) +{ + struct radeon_device *rdev = (struct radeon_device *)kgd; + + BUG_ON(kgd == NULL); + + radeon_sa_bo_manager_suspend(rdev, &rdev->kfd_bo); + radeon_sa_bo_manager_fini(rdev, &rdev->kfd_bo); +} + +static int allocate_mem(struct kgd_dev *kgd, size_t size, size_t alignment, + enum kgd_memory_pool pool, struct kgd_mem **mem) +{ + struct radeon_device *rdev = (struct radeon_device *)kgd; + u32 domain; + int r; + + BUG_ON(kgd == NULL); + + domain = pool_to_domain(pool); + if (domain != RADEON_GEM_DOMAIN_GTT) { + dev_err(rdev->dev, + "Only allowed to allocate gart memory for kfd\n"); + return -EINVAL; + } + + *mem = kmalloc(sizeof(struct kgd_mem), GFP_KERNEL); + if ((*mem) == NULL) + return -ENOMEM; + + r = radeon_sa_bo_new(rdev, &rdev->kfd_bo, &(*mem)->sa_bo, size, + alignment); + if (r) { + dev_err(rdev->dev, "failed to get memory for kfd (%d)\n", r); + return r; + } + + (*mem)->ptr = radeon_sa_bo_cpu_addr((*mem)->sa_bo); + (*mem)->gpu_addr = radeon_sa_bo_gpu_addr((*mem)->sa_bo); + + return 0; +} + +static void free_mem(struct kgd_dev *kgd, struct kgd_mem *mem) +{ + struct radeon_device *rdev = (struct radeon_device *)kgd; + + BUG_ON(kgd == NULL); + + radeon_sa_bo_free(rdev, &mem->sa_bo, NULL); + kfree(mem); +} + +static uint64_t get_vmem_size(struct kgd_dev *kgd) +{ + struct radeon_device *rdev = (struct radeon_device *)kgd; + + BUG_ON(kgd == NULL); + + return rdev->mc.real_vram_size; +} + +static uint64_t get_gpu_clock_counter(struct kgd_dev *kgd) +{ + struct radeon_device *rdev = (struct radeon_device *)kgd; + + return rdev->asic->get_gpu_clock_counter(rdev); +} + +static uint32_t get_max_engine_clock_in_mhz(struct kgd_dev *kgd) +{ + struct radeon_device *rdev = (struct radeon_device *)kgd; + + /* The sclk is in quantas of 10kHz */ + return rdev->pm.dpm.dyn_state.max_clock_voltage_on_ac.sclk / 100; +} + +static inline struct radeon_device *get_radeon_device(struct kgd_dev *kgd) +{ + return (struct radeon_device *)kgd; +} + +static void write_register(struct kgd_dev *kgd, uint32_t offset, uint32_t value) +{ + struct radeon_device *rdev = get_radeon_device(kgd); + + writel(value, (void __iomem *)(rdev->rmmio + offset)); +} + +static uint32_t read_register(struct kgd_dev *kgd, uint32_t offset) +{ + struct radeon_device *rdev = get_radeon_device(kgd); + + return readl((void __iomem *)(rdev->rmmio + offset)); +} + +static void lock_srbm(struct kgd_dev *kgd, uint32_t mec, uint32_t pipe, + uint32_t queue, uint32_t vmid) +{ + struct radeon_device *rdev = get_radeon_device(kgd); + uint32_t value = PIPEID(pipe) | MEID(mec) | VMID(vmid) | QUEUEID(queue); + + mutex_lock(&rdev->srbm_mutex); + write_register(kgd, SRBM_GFX_CNTL, value); +} + +static void unlock_srbm(struct kgd_dev *kgd) +{ + struct radeon_device *rdev = get_radeon_device(kgd); + + write_register(kgd, SRBM_GFX_CNTL, 0); + mutex_unlock(&rdev->srbm_mutex); +} + +static void acquire_queue(struct kgd_dev *kgd, uint32_t pipe_id, + uint32_t queue_id) +{ + uint32_t mec = (++pipe_id / CIK_PIPE_PER_MEC) + 1; + uint32_t pipe = (pipe_id % CIK_PIPE_PER_MEC); + + lock_srbm(kgd, mec, pipe, queue_id, 0); +} + +static void release_queue(struct kgd_dev *kgd) +{ + unlock_srbm(kgd); +} + +static void kgd_program_sh_mem_settings(struct kgd_dev *kgd, uint32_t vmid, + uint32_t sh_mem_config, + uint32_t sh_mem_ape1_base, + uint32_t sh_mem_ape1_limit, + uint32_t sh_mem_bases) +{ + lock_srbm(kgd, 0, 0, 0, vmid); + + write_register(kgd, SH_MEM_CONFIG, sh_mem_config); + write_register(kgd, SH_MEM_APE1_BASE, sh_mem_ape1_base); + write_register(kgd, SH_MEM_APE1_LIMIT, sh_mem_ape1_limit); + write_register(kgd, SH_MEM_BASES, sh_mem_bases); + + unlock_srbm(kgd); +} + +static int kgd_set_pasid_vmid_mapping(struct kgd_dev *kgd, unsigned int pasid, + unsigned int vmid) +{ + /* + * We have to assume that there is no outstanding mapping. + * The ATC_VMID_PASID_MAPPING_UPDATE_STATUS bit could be 0 + * because a mapping is in progress or because a mapping finished and + * the SW cleared it. + * So the protocol is to always wait & clear. + */ + uint32_t pasid_mapping = (pasid == 0) ? 0 : + (uint32_t)pasid | ATC_VMID_PASID_MAPPING_VALID; + + write_register(kgd, ATC_VMID0_PASID_MAPPING + vmid*sizeof(uint32_t), + pasid_mapping); + + while (!(read_register(kgd, ATC_VMID_PASID_MAPPING_UPDATE_STATUS) & + (1U << vmid))) + cpu_relax(); + write_register(kgd, ATC_VMID_PASID_MAPPING_UPDATE_STATUS, 1U << vmid); + + return 0; +} + +static int kgd_init_memory(struct kgd_dev *kgd) +{ + /* + * Configure apertures: + * LDS: 0x60000000'00000000 - 0x60000001'00000000 (4GB) + * Scratch: 0x60000001'00000000 - 0x60000002'00000000 (4GB) + * GPUVM: 0x60010000'00000000 - 0x60020000'00000000 (1TB) + */ + int i; + uint32_t sh_mem_bases = PRIVATE_BASE(0x6000) | SHARED_BASE(0x6000); + + for (i = 8; i < 16; i++) { + uint32_t sh_mem_config; + + lock_srbm(kgd, 0, 0, 0, i); + + sh_mem_config = ALIGNMENT_MODE(SH_MEM_ALIGNMENT_MODE_UNALIGNED); + sh_mem_config |= DEFAULT_MTYPE(MTYPE_NONCACHED); + + write_register(kgd, SH_MEM_CONFIG, sh_mem_config); + + write_register(kgd, SH_MEM_BASES, sh_mem_bases); + + /* Scratch aperture is not supported for now. */ + write_register(kgd, SH_STATIC_MEM_CONFIG, 0); + + /* APE1 disabled for now. */ + write_register(kgd, SH_MEM_APE1_BASE, 1); + write_register(kgd, SH_MEM_APE1_LIMIT, 0); + + unlock_srbm(kgd); + } + + return 0; +} + +static int kgd_init_pipeline(struct kgd_dev *kgd, uint32_t pipe_id, + uint32_t hpd_size, uint64_t hpd_gpu_addr) +{ + uint32_t mec = (++pipe_id / CIK_PIPE_PER_MEC) + 1; + uint32_t pipe = (pipe_id % CIK_PIPE_PER_MEC); + + lock_srbm(kgd, mec, pipe, 0, 0); + write_register(kgd, CP_HPD_EOP_BASE_ADDR, + lower_32_bits(hpd_gpu_addr >> 8)); + write_register(kgd, CP_HPD_EOP_BASE_ADDR_HI, + upper_32_bits(hpd_gpu_addr >> 8)); + write_register(kgd, CP_HPD_EOP_VMID, 0); + write_register(kgd, CP_HPD_EOP_CONTROL, hpd_size); + unlock_srbm(kgd); + + return 0; +} + +static inline struct cik_mqd *get_mqd(void *mqd) +{ + return (struct cik_mqd *)mqd; +} + +static int kgd_hqd_load(struct kgd_dev *kgd, void *mqd, uint32_t pipe_id, + uint32_t queue_id, uint32_t __user *wptr) +{ + uint32_t wptr_shadow, is_wptr_shadow_valid; + struct cik_mqd *m; + + m = get_mqd(mqd); + + is_wptr_shadow_valid = !get_user(wptr_shadow, wptr); + + acquire_queue(kgd, pipe_id, queue_id); + write_register(kgd, CP_MQD_BASE_ADDR, m->cp_mqd_base_addr_lo); + write_register(kgd, CP_MQD_BASE_ADDR_HI, m->cp_mqd_base_addr_hi); + write_register(kgd, CP_MQD_CONTROL, m->cp_mqd_control); + + write_register(kgd, CP_HQD_PQ_BASE, m->cp_hqd_pq_base_lo); + write_register(kgd, CP_HQD_PQ_BASE_HI, m->cp_hqd_pq_base_hi); + write_register(kgd, CP_HQD_PQ_CONTROL, m->cp_hqd_pq_control); + + write_register(kgd, CP_HQD_IB_CONTROL, m->cp_hqd_ib_control); + write_register(kgd, CP_HQD_IB_BASE_ADDR, m->cp_hqd_ib_base_addr_lo); + write_register(kgd, CP_HQD_IB_BASE_ADDR_HI, m->cp_hqd_ib_base_addr_hi); + + write_register(kgd, CP_HQD_IB_RPTR, m->cp_hqd_ib_rptr); + + write_register(kgd, CP_HQD_PERSISTENT_STATE, + m->cp_hqd_persistent_state); + write_register(kgd, CP_HQD_SEMA_CMD, m->cp_hqd_sema_cmd); + write_register(kgd, CP_HQD_MSG_TYPE, m->cp_hqd_msg_type); + + write_register(kgd, CP_HQD_ATOMIC0_PREOP_LO, + m->cp_hqd_atomic0_preop_lo); + + write_register(kgd, CP_HQD_ATOMIC0_PREOP_HI, + m->cp_hqd_atomic0_preop_hi); + + write_register(kgd, CP_HQD_ATOMIC1_PREOP_LO, + m->cp_hqd_atomic1_preop_lo); + + write_register(kgd, CP_HQD_ATOMIC1_PREOP_HI, + m->cp_hqd_atomic1_preop_hi); + + write_register(kgd, CP_HQD_PQ_RPTR_REPORT_ADDR, + m->cp_hqd_pq_rptr_report_addr_lo); + + write_register(kgd, CP_HQD_PQ_RPTR_REPORT_ADDR_HI, + m->cp_hqd_pq_rptr_report_addr_hi); + + write_register(kgd, CP_HQD_PQ_RPTR, m->cp_hqd_pq_rptr); + + write_register(kgd, CP_HQD_PQ_WPTR_POLL_ADDR, + m->cp_hqd_pq_wptr_poll_addr_lo); + + write_register(kgd, CP_HQD_PQ_WPTR_POLL_ADDR_HI, + m->cp_hqd_pq_wptr_poll_addr_hi); + + write_register(kgd, CP_HQD_PQ_DOORBELL_CONTROL, + m->cp_hqd_pq_doorbell_control); + + write_register(kgd, CP_HQD_VMID, m->cp_hqd_vmid); + + write_register(kgd, CP_HQD_QUANTUM, m->cp_hqd_quantum); + + write_register(kgd, CP_HQD_PIPE_PRIORITY, m->cp_hqd_pipe_priority); + write_register(kgd, CP_HQD_QUEUE_PRIORITY, m->cp_hqd_queue_priority); + + write_register(kgd, CP_HQD_IQ_RPTR, m->cp_hqd_iq_rptr); + + if (is_wptr_shadow_valid) + write_register(kgd, CP_HQD_PQ_WPTR, wptr_shadow); + + write_register(kgd, CP_HQD_ACTIVE, m->cp_hqd_active); + release_queue(kgd); + + return 0; +} + +static bool kgd_hqd_is_occupies(struct kgd_dev *kgd, uint64_t queue_address, + uint32_t pipe_id, uint32_t queue_id) +{ + uint32_t act; + bool retval = false; + uint32_t low, high; + + acquire_queue(kgd, pipe_id, queue_id); + act = read_register(kgd, CP_HQD_ACTIVE); + if (act) { + low = lower_32_bits(queue_address >> 8); + high = upper_32_bits(queue_address >> 8); + + if (low == read_register(kgd, CP_HQD_PQ_BASE) && + high == read_register(kgd, CP_HQD_PQ_BASE_HI)) + retval = true; + } + release_queue(kgd); + return retval; +} + +static int kgd_hqd_destroy(struct kgd_dev *kgd, uint32_t reset_type, + unsigned int timeout, uint32_t pipe_id, + uint32_t queue_id) +{ + uint32_t temp; + + acquire_queue(kgd, pipe_id, queue_id); + write_register(kgd, CP_HQD_PQ_DOORBELL_CONTROL, 0); + + write_register(kgd, CP_HQD_DEQUEUE_REQUEST, reset_type); + + while (true) { + temp = read_register(kgd, CP_HQD_ACTIVE); + if (temp & 0x1) + break; + if (timeout == 0) { + pr_err("kfd: cp queue preemption time out (%dms)\n", + temp); + return -ETIME; + } + msleep(20); + timeout -= 20; + } + + release_queue(kgd); + return 0; +} diff --git a/drivers/gpu/drm/radeon/radeon_kfd.h b/drivers/gpu/drm/radeon/radeon_kfd.h new file mode 100644 index 000000000000..f90e161ca507 --- /dev/null +++ b/drivers/gpu/drm/radeon/radeon_kfd.h @@ -0,0 +1,47 @@ +/* + * Copyright 2014 Advanced Micro Devices, Inc. + * + * Permission is hereby granted, free of charge, to any person obtaining a + * copy of this software and associated documentation files (the "Software"), + * to deal in the Software without restriction, including without limitation + * the rights to use, copy, modify, merge, publish, distribute, sublicense, + * and/or sell copies of the Software, and to permit persons to whom the + * Software is furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in + * all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL + * THE COPYRIGHT HOLDER(S) OR AUTHOR(S) BE LIABLE FOR ANY CLAIM, DAMAGES OR + * OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, + * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR + * OTHER DEALINGS IN THE SOFTWARE. + */ + +/* + * radeon_kfd.h defines the private interface between the + * AMD kernel graphics drivers and the AMD KFD. + */ + +#ifndef RADEON_KFD_H_INCLUDED +#define RADEON_KFD_H_INCLUDED + +#include <linux/types.h> +#include "../amd/include/kgd_kfd_interface.h" + +struct radeon_device; + +bool radeon_kfd_init(void); +void radeon_kfd_fini(void); + +void radeon_kfd_suspend(struct radeon_device *rdev); +int radeon_kfd_resume(struct radeon_device *rdev); +void radeon_kfd_interrupt(struct radeon_device *rdev, + const void *ih_ring_entry); +void radeon_kfd_device_probe(struct radeon_device *rdev); +void radeon_kfd_device_init(struct radeon_device *rdev); +void radeon_kfd_device_fini(struct radeon_device *rdev); + +#endif /* RADEON_KFD_H_INCLUDED */ diff --git a/drivers/gpu/drm/radeon/radeon_kms.c b/drivers/gpu/drm/radeon/radeon_kms.c index 8309b11e674d..6eb561d33eba 100644 --- a/drivers/gpu/drm/radeon/radeon_kms.c +++ b/drivers/gpu/drm/radeon/radeon_kms.c @@ -34,6 +34,8 @@ #include <linux/slab.h> #include <linux/pm_runtime.h> +#include "radeon_kfd.h" + #if defined(CONFIG_VGA_SWITCHEROO) bool radeon_has_atpx(void); #else @@ -63,6 +65,8 @@ int radeon_driver_unload_kms(struct drm_device *dev) pm_runtime_get_sync(dev->dev); + radeon_kfd_device_fini(rdev); + radeon_acpi_fini(rdev); radeon_modeset_fini(rdev); @@ -142,6 +146,9 @@ int radeon_driver_load_kms(struct drm_device *dev, unsigned long flags) "Error during ACPI methods call\n"); } + radeon_kfd_device_probe(rdev); + radeon_kfd_device_init(rdev); + if (radeon_is_px(dev)) { pm_runtime_use_autosuspend(dev->dev); pm_runtime_set_autosuspend_delay(dev->dev, 5000); |