summaryrefslogtreecommitdiff
path: root/drivers/gpu/drm/radeon/r100_track.h
diff options
context:
space:
mode:
authorDave Airlie <airlied@redhat.com>2009-09-23 16:56:27 +1000
committerDave Airlie <airlied@linux.ie>2009-09-25 13:08:18 +1000
commit513bcb4655e68706594e45dfa1d4b181500110ba (patch)
treeed457db4cfb202015866a131ad4e742503728fad /drivers/gpu/drm/radeon/r100_track.h
parent35e4b7af21d77933abda3d41d1672589eb6c960c (diff)
downloadlinux-513bcb4655e68706594e45dfa1d4b181500110ba.tar.gz
drm/radeon/kms: don't require up to 64k allocations. (v2)
This avoids needing to do a kmalloc > PAGE_SIZE for the main indirect buffer chunk, it adds an accessor for all reads from the chunk and caches a single page at a time for subsequent reads. changes since v1: Use a two page pool which should be the most common case a single packet spanning > PAGE_SIZE will be hit, but I'm having trouble seeing anywhere we currently generate anything like that. hopefully proper short page copying at end added parser_error flag to set deep errors instead of having to test every ib value fetch. fixed bug in patch that went to list. Signed-off-by: Dave Airlie <airlied@redhat.com>
Diffstat (limited to 'drivers/gpu/drm/radeon/r100_track.h')
-rw-r--r--drivers/gpu/drm/radeon/r100_track.h69
1 files changed, 64 insertions, 5 deletions
diff --git a/drivers/gpu/drm/radeon/r100_track.h b/drivers/gpu/drm/radeon/r100_track.h
index 70a82eda394a..0daf0d76a891 100644
--- a/drivers/gpu/drm/radeon/r100_track.h
+++ b/drivers/gpu/drm/radeon/r100_track.h
@@ -84,6 +84,8 @@ int r200_packet0_check(struct radeon_cs_parser *p,
struct radeon_cs_packet *pkt,
unsigned idx, unsigned reg);
+
+
static inline int r100_reloc_pitch_offset(struct radeon_cs_parser *p,
struct radeon_cs_packet *pkt,
unsigned idx,
@@ -93,9 +95,7 @@ static inline int r100_reloc_pitch_offset(struct radeon_cs_parser *p,
u32 tile_flags = 0;
u32 tmp;
struct radeon_cs_reloc *reloc;
- struct radeon_cs_chunk *ib_chunk;
-
- ib_chunk = &p->chunks[p->chunk_ib_idx];
+ u32 value;
r = r100_cs_packet_next_reloc(p, &reloc);
if (r) {
@@ -104,7 +104,8 @@ static inline int r100_reloc_pitch_offset(struct radeon_cs_parser *p,
r100_cs_dump_packet(p, pkt);
return r;
}
- tmp = ib_chunk->kdata[idx] & 0x003fffff;
+ value = radeon_get_ib_value(p, idx);
+ tmp = value & 0x003fffff;
tmp += (((u32)reloc->lobj.gpu_offset) >> 10);
if (reloc->lobj.tiling_flags & RADEON_TILING_MACRO)
@@ -119,6 +120,64 @@ static inline int r100_reloc_pitch_offset(struct radeon_cs_parser *p,
}
tmp |= tile_flags;
- p->ib->ptr[idx] = (ib_chunk->kdata[idx] & 0x3fc00000) | tmp;
+ p->ib->ptr[idx] = (value & 0x3fc00000) | tmp;
return 0;
}
+
+static inline int r100_packet3_load_vbpntr(struct radeon_cs_parser *p,
+ struct radeon_cs_packet *pkt,
+ int idx)
+{
+ unsigned c, i;
+ struct radeon_cs_reloc *reloc;
+ struct r100_cs_track *track;
+ int r = 0;
+ volatile uint32_t *ib;
+ u32 idx_value;
+
+ ib = p->ib->ptr;
+ track = (struct r100_cs_track *)p->track;
+ c = radeon_get_ib_value(p, idx++) & 0x1F;
+ track->num_arrays = c;
+ for (i = 0; i < (c - 1); i+=2, idx+=3) {
+ r = r100_cs_packet_next_reloc(p, &reloc);
+ if (r) {
+ DRM_ERROR("No reloc for packet3 %d\n",
+ pkt->opcode);
+ r100_cs_dump_packet(p, pkt);
+ return r;
+ }
+ idx_value = radeon_get_ib_value(p, idx);
+ ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->lobj.gpu_offset);
+
+ track->arrays[i + 0].esize = idx_value >> 8;
+ track->arrays[i + 0].robj = reloc->robj;
+ track->arrays[i + 0].esize &= 0x7F;
+ r = r100_cs_packet_next_reloc(p, &reloc);
+ if (r) {
+ DRM_ERROR("No reloc for packet3 %d\n",
+ pkt->opcode);
+ r100_cs_dump_packet(p, pkt);
+ return r;
+ }
+ ib[idx+2] = radeon_get_ib_value(p, idx + 2) + ((u32)reloc->lobj.gpu_offset);
+ track->arrays[i + 1].robj = reloc->robj;
+ track->arrays[i + 1].esize = idx_value >> 24;
+ track->arrays[i + 1].esize &= 0x7F;
+ }
+ if (c & 1) {
+ r = r100_cs_packet_next_reloc(p, &reloc);
+ if (r) {
+ DRM_ERROR("No reloc for packet3 %d\n",
+ pkt->opcode);
+ r100_cs_dump_packet(p, pkt);
+ return r;
+ }
+ idx_value = radeon_get_ib_value(p, idx);
+ ib[idx+1] = radeon_get_ib_value(p, idx + 1) + ((u32)reloc->lobj.gpu_offset);
+ track->arrays[i + 0].robj = reloc->robj;
+ track->arrays[i + 0].esize = idx_value >> 8;
+ track->arrays[i + 0].esize &= 0x7F;
+ }
+ return r;
+}