diff options
Diffstat (limited to 'cogl/cogl/cogl-journal.c')
-rw-r--r-- | cogl/cogl/cogl-journal.c | 1853 |
1 files changed, 1853 insertions, 0 deletions
diff --git a/cogl/cogl/cogl-journal.c b/cogl/cogl/cogl-journal.c new file mode 100644 index 000000000..8ffe25f97 --- /dev/null +++ b/cogl/cogl/cogl-journal.c @@ -0,0 +1,1853 @@ +/* + * Cogl + * + * A Low Level GPU Graphics and Utilities API + * + * Copyright (C) 2007,2008,2009 Intel Corporation. + * + * Permission is hereby granted, free of charge, to any person + * obtaining a copy of this software and associated documentation + * files (the "Software"), to deal in the Software without + * restriction, including without limitation the rights to use, copy, + * modify, merge, publish, distribute, sublicense, and/or sell copies + * of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be + * included in all copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, + * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF + * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND + * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS + * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN + * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN + * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + * + * + */ + +#ifdef HAVE_CONFIG_H +#include "config.h" +#endif + +#include "cogl-debug.h" +#include "cogl-context-private.h" +#include "cogl-journal-private.h" +#include "cogl-texture-private.h" +#include "cogl-pipeline-private.h" +#include "cogl-pipeline-opengl-private.h" +#include "cogl-vertex-buffer-private.h" +#include "cogl-framebuffer-private.h" +#include "cogl-profile.h" +#include "cogl-attribute-private.h" +#include "cogl-point-in-poly-private.h" +#include "cogl-private.h" +#include "cogl1-context.h" + +#include <string.h> +#include <gmodule.h> +#include <math.h> + +/* XXX NB: + * The data logged in logged_vertices is formatted as follows: + * + * Per entry: + * 4 RGBA GLubytes for the color + * 2 floats for the top left position + * 2 * n_layers floats for the top left texture coordinates + * 2 floats for the bottom right position + * 2 * n_layers floats for the bottom right texture coordinates + */ +#define GET_JOURNAL_ARRAY_STRIDE_FOR_N_LAYERS(N_LAYERS) \ + (N_LAYERS * 2 + 2) + +/* XXX NB: + * Once in the vertex array, the journal's vertex data is arranged as follows: + * 4 vertices per quad: + * 2 or 3 GLfloats per position (3 when doing software transforms) + * 4 RGBA GLubytes, + * 2 GLfloats per tex coord * n_layers + * + * Where n_layers corresponds to the number of pipeline layers enabled + * + * To avoid frequent changes in the stride of our vertex data we always pad + * n_layers to be >= 2 + * + * There will be four vertices per quad in the vertex array + * + * When we are transforming quads in software we need to also track the z + * coordinate of transformed vertices. + * + * So for a given number of layers this gets the stride in 32bit words: + */ +#define SW_TRANSFORM (!(COGL_DEBUG_ENABLED \ + (COGL_DEBUG_DISABLE_SOFTWARE_TRANSFORM))) +#define POS_STRIDE (SW_TRANSFORM ? 3 : 2) /* number of 32bit words */ +#define N_POS_COMPONENTS POS_STRIDE +#define COLOR_STRIDE 1 /* number of 32bit words */ +#define TEX_STRIDE 2 /* number of 32bit words */ +#define MIN_LAYER_PADING 2 +#define GET_JOURNAL_VB_STRIDE_FOR_N_LAYERS(N_LAYERS) \ + (POS_STRIDE + COLOR_STRIDE + \ + TEX_STRIDE * (N_LAYERS < MIN_LAYER_PADING ? MIN_LAYER_PADING : N_LAYERS)) + +/* If a batch is longer than this threshold then we'll assume it's not + worth doing software clipping and it's cheaper to program the GPU + to do the clip */ +#define COGL_JOURNAL_HARDWARE_CLIP_THRESHOLD 8 + +typedef struct _CoglJournalFlushState +{ + CoglContext *ctx; + + CoglJournal *journal; + + CoglAttributeBuffer *attribute_buffer; + GArray *attributes; + int current_attribute; + + size_t stride; + size_t array_offset; + GLuint current_vertex; + + CoglIndices *indices; + size_t indices_type_size; + + CoglPipeline *pipeline; +} CoglJournalFlushState; + +typedef void (*CoglJournalBatchCallback) (CoglJournalEntry *start, + int n_entries, + void *data); +typedef CoglBool (*CoglJournalBatchTest) (CoglJournalEntry *entry0, + CoglJournalEntry *entry1); + +static void _cogl_journal_free (CoglJournal *journal); + +COGL_OBJECT_INTERNAL_DEFINE (Journal, journal); + +static void +_cogl_journal_free (CoglJournal *journal) +{ + int i; + + if (journal->entries) + g_array_free (journal->entries, TRUE); + if (journal->vertices) + g_array_free (journal->vertices, TRUE); + + for (i = 0; i < COGL_JOURNAL_VBO_POOL_SIZE; i++) + if (journal->vbo_pool[i]) + cogl_object_unref (journal->vbo_pool[i]); + + g_slice_free (CoglJournal, journal); +} + +CoglJournal * +_cogl_journal_new (CoglFramebuffer *framebuffer) +{ + CoglJournal *journal = g_slice_new0 (CoglJournal); + + /* The journal keeps a pointer back to the framebuffer because there + is effectively a 1:1 mapping between journals and framebuffers. + However, to avoid a circular reference the journal doesn't take a + reference unless it is non-empty. The framebuffer has a special + unref implementation to ensure that the journal is flushed when + the journal is the only thing keeping it alive */ + journal->framebuffer = framebuffer; + + journal->entries = g_array_new (FALSE, FALSE, sizeof (CoglJournalEntry)); + journal->vertices = g_array_new (FALSE, FALSE, sizeof (float)); + + _cogl_list_init (&journal->pending_fences); + + return _cogl_journal_object_new (journal); +} + +static void +_cogl_journal_dump_logged_quad (uint8_t *data, int n_layers) +{ + size_t stride = GET_JOURNAL_ARRAY_STRIDE_FOR_N_LAYERS (n_layers); + int i; + + g_print ("n_layers = %d; rgba=0x%02X%02X%02X%02X\n", + n_layers, data[0], data[1], data[2], data[3]); + + data += 4; + + for (i = 0; i < 2; i++) + { + float *v = (float *)data + (i * stride); + int j; + + g_print ("v%d: x = %f, y = %f", i, v[0], v[1]); + + for (j = 0; j < n_layers; j++) + { + float *t = v + 2 + TEX_STRIDE * j; + g_print (", tx%d = %f, ty%d = %f", j, t[0], j, t[1]); + } + g_print ("\n"); + } +} + +static void +_cogl_journal_dump_quad_vertices (uint8_t *data, int n_layers) +{ + size_t stride = GET_JOURNAL_VB_STRIDE_FOR_N_LAYERS (n_layers); + int i; + + g_print ("n_layers = %d; stride = %d; pos stride = %d; color stride = %d; " + "tex stride = %d; stride in bytes = %d\n", + n_layers, (int)stride, POS_STRIDE, COLOR_STRIDE, + TEX_STRIDE, (int)stride * 4); + + for (i = 0; i < 4; i++) + { + float *v = (float *)data + (i * stride); + uint8_t *c = data + (POS_STRIDE * 4) + (i * stride * 4); + int j; + + if (G_UNLIKELY (COGL_DEBUG_ENABLED + (COGL_DEBUG_DISABLE_SOFTWARE_TRANSFORM))) + g_print ("v%d: x = %f, y = %f, rgba=0x%02X%02X%02X%02X", + i, v[0], v[1], c[0], c[1], c[2], c[3]); + else + g_print ("v%d: x = %f, y = %f, z = %f, rgba=0x%02X%02X%02X%02X", + i, v[0], v[1], v[2], c[0], c[1], c[2], c[3]); + for (j = 0; j < n_layers; j++) + { + float *t = v + POS_STRIDE + COLOR_STRIDE + TEX_STRIDE * j; + g_print (", tx%d = %f, ty%d = %f", j, t[0], j, t[1]); + } + g_print ("\n"); + } +} + +static void +_cogl_journal_dump_quad_batch (uint8_t *data, int n_layers, int n_quads) +{ + size_t byte_stride = GET_JOURNAL_VB_STRIDE_FOR_N_LAYERS (n_layers) * 4; + int i; + + g_print ("_cogl_journal_dump_quad_batch: n_layers = %d, n_quads = %d\n", + n_layers, n_quads); + for (i = 0; i < n_quads; i++) + _cogl_journal_dump_quad_vertices (data + byte_stride * 2 * i, n_layers); +} + +static void +batch_and_call (CoglJournalEntry *entries, + int n_entries, + CoglJournalBatchTest can_batch_callback, + CoglJournalBatchCallback batch_callback, + void *data) +{ + int i; + int batch_len = 1; + CoglJournalEntry *batch_start = entries; + + if (n_entries < 1) + return; + + for (i = 1; i < n_entries; i++) + { + CoglJournalEntry *entry0 = &entries[i - 1]; + CoglJournalEntry *entry1 = entry0 + 1; + + if (can_batch_callback (entry0, entry1)) + { + batch_len++; + continue; + } + + batch_callback (batch_start, batch_len, data); + + batch_start = entry1; + batch_len = 1; + } + + /* The last batch... */ + batch_callback (batch_start, batch_len, data); +} + +static void +_cogl_journal_flush_modelview_and_entries (CoglJournalEntry *batch_start, + int batch_len, + void *data) +{ + CoglJournalFlushState *state = data; + CoglContext *ctx = state->ctx; + CoglFramebuffer *framebuffer = state->journal->framebuffer; + CoglAttribute **attributes; + CoglDrawFlags draw_flags = (COGL_DRAW_SKIP_JOURNAL_FLUSH | + COGL_DRAW_SKIP_PIPELINE_VALIDATION | + COGL_DRAW_SKIP_FRAMEBUFFER_FLUSH | + COGL_DRAW_SKIP_LEGACY_STATE); + + COGL_STATIC_TIMER (time_flush_modelview_and_entries, + "flush: pipeline+entries", /* parent */ + "flush: modelview+entries", + "The time spent flushing modelview + entries", + 0 /* no application private data */); + + COGL_TIMER_START (_cogl_uprof_context, time_flush_modelview_and_entries); + + if (G_UNLIKELY (COGL_DEBUG_ENABLED (COGL_DEBUG_BATCHING))) + g_print ("BATCHING: modelview batch len = %d\n", batch_len); + + if (G_UNLIKELY (COGL_DEBUG_ENABLED (COGL_DEBUG_DISABLE_SOFTWARE_TRANSFORM))) + _cogl_context_set_current_modelview_entry (ctx, + batch_start->modelview_entry); + + attributes = (CoglAttribute **)state->attributes->data; + + if (!_cogl_pipeline_get_real_blend_enabled (state->pipeline)) + draw_flags |= COGL_DRAW_COLOR_ATTRIBUTE_IS_OPAQUE; + +#ifdef HAVE_COGL_GL + if (_cogl_has_private_feature (ctx, COGL_PRIVATE_FEATURE_QUADS)) + { + /* XXX: it's rather evil that we sneak in the GL_QUADS enum here... */ + _cogl_framebuffer_draw_attributes (framebuffer, + state->pipeline, + GL_QUADS, + state->current_vertex, batch_len * 4, + attributes, + state->attributes->len, + draw_flags); + } + else +#endif /* HAVE_COGL_GL */ + { + if (batch_len > 1) + { + CoglVerticesMode mode = COGL_VERTICES_MODE_TRIANGLES; + int first_vertex = state->current_vertex * 6 / 4; + _cogl_framebuffer_draw_indexed_attributes (framebuffer, + state->pipeline, + mode, + first_vertex, + batch_len * 6, + state->indices, + attributes, + state->attributes->len, + draw_flags); + } + else + { + _cogl_framebuffer_draw_attributes (framebuffer, + state->pipeline, + COGL_VERTICES_MODE_TRIANGLE_FAN, + state->current_vertex, 4, + attributes, + state->attributes->len, + draw_flags); + } + } + + /* DEBUGGING CODE XXX: This path will cause all rectangles to be + * drawn with a coloured outline. Each batch will be rendered with + * the same color. This may e.g. help with debugging texture slicing + * issues, visually seeing what is batched and debugging blending + * issues, plus it looks quite cool. + */ + if (G_UNLIKELY (COGL_DEBUG_ENABLED (COGL_DEBUG_RECTANGLES))) + { + static CoglPipeline *outline = NULL; + uint8_t color_intensity; + int i; + CoglAttribute *loop_attributes[1]; + + if (outline == NULL) + outline = cogl_pipeline_new (ctx); + + /* The least significant three bits represent the three + components so that the order of colours goes red, green, + yellow, blue, magenta, cyan. Black and white are skipped. The + next two bits give four scales of intensity for those colours + in the order 0xff, 0xcc, 0x99, and 0x66. This gives a total + of 24 colours. If there are more than 24 batches on the stage + then it will wrap around */ + color_intensity = 0xff - 0x33 * (ctx->journal_rectangles_color >> 3); + cogl_pipeline_set_color4ub (outline, + (ctx->journal_rectangles_color & 1) ? + color_intensity : 0, + (ctx->journal_rectangles_color & 2) ? + color_intensity : 0, + (ctx->journal_rectangles_color & 4) ? + color_intensity : 0, + 0xff); + + loop_attributes[0] = attributes[0]; /* we just want the position */ + for (i = 0; i < batch_len; i++) + _cogl_framebuffer_draw_attributes (framebuffer, + outline, + COGL_VERTICES_MODE_LINE_LOOP, + 4 * i + state->current_vertex, 4, + loop_attributes, + 1, + draw_flags); + + /* Go to the next color */ + do + ctx->journal_rectangles_color = ((ctx->journal_rectangles_color + 1) & + ((1 << 5) - 1)); + /* We don't want to use black or white */ + while ((ctx->journal_rectangles_color & 0x07) == 0 + || (ctx->journal_rectangles_color & 0x07) == 0x07); + } + + state->current_vertex += (4 * batch_len); + + COGL_TIMER_STOP (_cogl_uprof_context, time_flush_modelview_and_entries); +} + +static CoglBool +compare_entry_modelviews (CoglJournalEntry *entry0, + CoglJournalEntry *entry1) +{ + /* Batch together quads with the same model view matrix */ + return entry0->modelview_entry == entry1->modelview_entry; +} + +/* At this point we have a run of quads that we know have compatible + * pipelines, but they may not all have the same modelview matrix */ +static void +_cogl_journal_flush_pipeline_and_entries (CoglJournalEntry *batch_start, + int batch_len, + void *data) +{ + CoglJournalFlushState *state = data; + COGL_STATIC_TIMER (time_flush_pipeline_entries, + "flush: texcoords+pipeline+entries", /* parent */ + "flush: pipeline+entries", + "The time spent flushing pipeline + entries", + 0 /* no application private data */); + + COGL_TIMER_START (_cogl_uprof_context, time_flush_pipeline_entries); + + if (G_UNLIKELY (COGL_DEBUG_ENABLED (COGL_DEBUG_BATCHING))) + g_print ("BATCHING: pipeline batch len = %d\n", batch_len); + + state->pipeline = batch_start->pipeline; + + /* If we haven't transformed the quads in software then we need to also break + * up batches according to changes in the modelview matrix... */ + if (G_UNLIKELY (COGL_DEBUG_ENABLED (COGL_DEBUG_DISABLE_SOFTWARE_TRANSFORM))) + { + batch_and_call (batch_start, + batch_len, + compare_entry_modelviews, + _cogl_journal_flush_modelview_and_entries, + data); + } + else + _cogl_journal_flush_modelview_and_entries (batch_start, batch_len, data); + + COGL_TIMER_STOP (_cogl_uprof_context, time_flush_pipeline_entries); +} + +static CoglBool +compare_entry_pipelines (CoglJournalEntry *entry0, CoglJournalEntry *entry1) +{ + /* batch rectangles using compatible pipelines */ + + if (_cogl_pipeline_equal (entry0->pipeline, + entry1->pipeline, + (COGL_PIPELINE_STATE_ALL & + ~COGL_PIPELINE_STATE_COLOR), + COGL_PIPELINE_LAYER_STATE_ALL, + 0)) + return TRUE; + else + return FALSE; +} + +typedef struct _CreateAttributeState +{ + int current; + CoglJournalFlushState *flush_state; +} CreateAttributeState; + +static CoglBool +create_attribute_cb (CoglPipeline *pipeline, + int layer_number, + void *user_data) +{ + CreateAttributeState *state = user_data; + CoglJournalFlushState *flush_state = state->flush_state; + CoglAttribute **attribute_entry = + &g_array_index (flush_state->attributes, + CoglAttribute *, + state->current + 2); + const char *names[] = { + "cogl_tex_coord0_in", + "cogl_tex_coord1_in", + "cogl_tex_coord2_in", + "cogl_tex_coord3_in", + "cogl_tex_coord4_in", + "cogl_tex_coord5_in", + "cogl_tex_coord6_in", + "cogl_tex_coord7_in" + }; + char *name; + + /* XXX NB: + * Our journal's vertex data is arranged as follows: + * 4 vertices per quad: + * 2 or 3 floats per position (3 when doing software transforms) + * 4 RGBA bytes, + * 2 floats per tex coord * n_layers + * (though n_layers may be padded; see definition of + * GET_JOURNAL_VB_STRIDE_FOR_N_LAYERS for details) + */ + name = layer_number < 8 ? (char *)names[layer_number] : + g_strdup_printf ("cogl_tex_coord%d_in", layer_number); + + /* XXX: it may be worth having some form of static initializer for + * attributes... */ + *attribute_entry = + cogl_attribute_new (flush_state->attribute_buffer, + name, + flush_state->stride, + flush_state->array_offset + + (POS_STRIDE + COLOR_STRIDE) * 4 + + TEX_STRIDE * 4 * state->current, + 2, + COGL_ATTRIBUTE_TYPE_FLOAT); + + if (layer_number >= 8) + g_free (name); + + state->current++; + + return TRUE; +} + +/* Since the stride may not reflect the number of texture layers in use + * (due to padding) we deal with texture coordinate offsets separately + * from vertex and color offsets... */ +static void +_cogl_journal_flush_texcoord_vbo_offsets_and_entries ( + CoglJournalEntry *batch_start, + int batch_len, + void *data) +{ + CoglJournalFlushState *state = data; + CreateAttributeState create_attrib_state; + int i; + COGL_STATIC_TIMER (time_flush_texcoord_pipeline_entries, + "flush: vbo+texcoords+pipeline+entries", /* parent */ + "flush: texcoords+pipeline+entries", + "The time spent flushing texcoord offsets + pipeline " + "+ entries", + 0 /* no application private data */); + + COGL_TIMER_START (_cogl_uprof_context, time_flush_texcoord_pipeline_entries); + + /* NB: attributes 0 and 1 are position and color */ + + for (i = 2; i < state->attributes->len; i++) + cogl_object_unref (g_array_index (state->attributes, CoglAttribute *, i)); + + g_array_set_size (state->attributes, batch_start->n_layers + 2); + + create_attrib_state.current = 0; + create_attrib_state.flush_state = state; + + cogl_pipeline_foreach_layer (batch_start->pipeline, + create_attribute_cb, + &create_attrib_state); + + batch_and_call (batch_start, + batch_len, + compare_entry_pipelines, + _cogl_journal_flush_pipeline_and_entries, + data); + COGL_TIMER_STOP (_cogl_uprof_context, time_flush_texcoord_pipeline_entries); +} + +static CoglBool +compare_entry_layer_numbers (CoglJournalEntry *entry0, CoglJournalEntry *entry1) +{ + if (_cogl_pipeline_layer_numbers_equal (entry0->pipeline, entry1->pipeline)) + return TRUE; + else + return FALSE; +} + +/* At this point we know the stride has changed from the previous batch + * of journal entries */ +static void +_cogl_journal_flush_vbo_offsets_and_entries (CoglJournalEntry *batch_start, + int batch_len, + void *data) +{ + CoglJournalFlushState *state = data; + CoglContext *ctx = state->journal->framebuffer->context; + size_t stride; + int i; + CoglAttribute **attribute_entry; + COGL_STATIC_TIMER (time_flush_vbo_texcoord_pipeline_entries, + "flush: clip+vbo+texcoords+pipeline+entries", /* parent */ + "flush: vbo+texcoords+pipeline+entries", + "The time spent flushing vbo + texcoord offsets + " + "pipeline + entries", + 0 /* no application private data */); + + COGL_TIMER_START (_cogl_uprof_context, + time_flush_vbo_texcoord_pipeline_entries); + + if (G_UNLIKELY (COGL_DEBUG_ENABLED (COGL_DEBUG_BATCHING))) + g_print ("BATCHING: vbo offset batch len = %d\n", batch_len); + + /* XXX NB: + * Our journal's vertex data is arranged as follows: + * 4 vertices per quad: + * 2 or 3 GLfloats per position (3 when doing software transforms) + * 4 RGBA GLubytes, + * 2 GLfloats per tex coord * n_layers + * (though n_layers may be padded; see definition of + * GET_JOURNAL_VB_STRIDE_FOR_N_LAYERS for details) + */ + stride = GET_JOURNAL_VB_STRIDE_FOR_N_LAYERS (batch_start->n_layers); + stride *= sizeof (float); + state->stride = stride; + + for (i = 0; i < state->attributes->len; i++) + cogl_object_unref (g_array_index (state->attributes, CoglAttribute *, i)); + + g_array_set_size (state->attributes, 2); + + attribute_entry = &g_array_index (state->attributes, CoglAttribute *, 0); + *attribute_entry = cogl_attribute_new (state->attribute_buffer, + "cogl_position_in", + stride, + state->array_offset, + N_POS_COMPONENTS, + COGL_ATTRIBUTE_TYPE_FLOAT); + + attribute_entry = &g_array_index (state->attributes, CoglAttribute *, 1); + *attribute_entry = + cogl_attribute_new (state->attribute_buffer, + "cogl_color_in", + stride, + state->array_offset + (POS_STRIDE * 4), + 4, + COGL_ATTRIBUTE_TYPE_UNSIGNED_BYTE); + + if (!_cogl_has_private_feature (ctx, COGL_PRIVATE_FEATURE_QUADS)) + state->indices = cogl_get_rectangle_indices (ctx, batch_len); + + /* We only create new Attributes when the stride within the + * AttributeBuffer changes. (due to a change in the number of pipeline + * layers) While the stride remains constant we walk forward through + * the above AttributeBuffer using a vertex offset passed to + * cogl_draw_attributes + */ + state->current_vertex = 0; + + if (G_UNLIKELY (COGL_DEBUG_ENABLED (COGL_DEBUG_JOURNAL)) && + cogl_has_feature (ctx, COGL_FEATURE_ID_MAP_BUFFER_FOR_READ)) + { + uint8_t *verts; + + /* Mapping a buffer for read is probably a really bad thing to + do but this will only happen during debugging so it probably + doesn't matter */ + verts = ((uint8_t *)_cogl_buffer_map (COGL_BUFFER (state->attribute_buffer), + COGL_BUFFER_ACCESS_READ, 0, + NULL) + + state->array_offset); + + _cogl_journal_dump_quad_batch (verts, + batch_start->n_layers, + batch_len); + + cogl_buffer_unmap (COGL_BUFFER (state->attribute_buffer)); + } + + batch_and_call (batch_start, + batch_len, + compare_entry_layer_numbers, + _cogl_journal_flush_texcoord_vbo_offsets_and_entries, + data); + + /* progress forward through the VBO containing all our vertices */ + state->array_offset += (stride * 4 * batch_len); + if (G_UNLIKELY (COGL_DEBUG_ENABLED (COGL_DEBUG_JOURNAL))) + g_print ("new vbo offset = %lu\n", (unsigned long)state->array_offset); + + COGL_TIMER_STOP (_cogl_uprof_context, + time_flush_vbo_texcoord_pipeline_entries); +} + +static CoglBool +compare_entry_strides (CoglJournalEntry *entry0, CoglJournalEntry *entry1) +{ + /* Currently the only thing that affects the stride for our vertex arrays + * is the number of pipeline layers. We need to update our VBO offsets + * whenever the stride changes. */ + /* TODO: We should be padding the n_layers == 1 case as if it were + * n_layers == 2 so we can reduce the need to split batches. */ + if (entry0->n_layers == entry1->n_layers || + (entry0->n_layers <= MIN_LAYER_PADING && + entry1->n_layers <= MIN_LAYER_PADING)) + return TRUE; + else + return FALSE; +} + +/* At this point we know the batch has a unique clip stack */ +static void +_cogl_journal_flush_clip_stacks_and_entries (CoglJournalEntry *batch_start, + int batch_len, + void *data) +{ + CoglJournalFlushState *state = data; + CoglFramebuffer *framebuffer = state->journal->framebuffer; + CoglContext *ctx = framebuffer->context; + CoglMatrixStack *projection_stack; + + COGL_STATIC_TIMER (time_flush_clip_stack_pipeline_entries, + "Journal Flush", /* parent */ + "flush: clip+vbo+texcoords+pipeline+entries", + "The time spent flushing clip + vbo + texcoord offsets + " + "pipeline + entries", + 0 /* no application private data */); + + COGL_TIMER_START (_cogl_uprof_context, + time_flush_clip_stack_pipeline_entries); + + if (G_UNLIKELY (COGL_DEBUG_ENABLED (COGL_DEBUG_BATCHING))) + g_print ("BATCHING: clip stack batch len = %d\n", batch_len); + + _cogl_clip_stack_flush (batch_start->clip_stack, framebuffer); + + /* XXX: Because we are manually flushing clip state here we need to + * make sure that the clip state gets updated the next time we flush + * framebuffer state by marking the current framebuffer's clip state + * as changed. */ + ctx->current_draw_buffer_changes |= COGL_FRAMEBUFFER_STATE_CLIP; + + /* If we have transformed all our quads at log time then we ensure + * no further model transform is applied by loading the identity + * matrix here. We need to do this after flushing the clip stack + * because the clip stack flushing code can modify the current + * modelview matrix entry */ + if (G_LIKELY (!(COGL_DEBUG_ENABLED (COGL_DEBUG_DISABLE_SOFTWARE_TRANSFORM)))) + _cogl_context_set_current_modelview_entry (ctx, &ctx->identity_entry); + + /* Setting up the clip state can sometimes also update the current + * projection matrix entry so we should update it again. This will have + * no affect if the clip code didn't modify the projection */ + projection_stack = + _cogl_framebuffer_get_projection_stack (framebuffer); + _cogl_context_set_current_projection_entry (ctx, + projection_stack->last_entry); + + batch_and_call (batch_start, + batch_len, + compare_entry_strides, + _cogl_journal_flush_vbo_offsets_and_entries, /* callback */ + data); + + COGL_TIMER_STOP (_cogl_uprof_context, + time_flush_clip_stack_pipeline_entries); +} + +typedef struct +{ + float x_1, y_1; + float x_2, y_2; +} ClipBounds; + +static CoglBool +can_software_clip_entry (CoglJournalEntry *journal_entry, + CoglJournalEntry *prev_journal_entry, + CoglClipStack *clip_stack, + ClipBounds *clip_bounds_out) +{ + CoglPipeline *pipeline = journal_entry->pipeline; + CoglClipStack *clip_entry; + int layer_num; + + clip_bounds_out->x_1 = -G_MAXFLOAT; + clip_bounds_out->y_1 = -G_MAXFLOAT; + clip_bounds_out->x_2 = G_MAXFLOAT; + clip_bounds_out->y_2 = G_MAXFLOAT; + + /* Check the pipeline is usable. We can short-cut here for + entries using the same pipeline as the previous entry */ + if (prev_journal_entry == NULL || pipeline != prev_journal_entry->pipeline) + { + /* If the pipeline has a user program then we can't reliably modify + the texture coordinates */ + if (cogl_pipeline_get_user_program (pipeline)) + return FALSE; + + /* If any of the pipeline layers have a texture matrix then we can't + reliably modify the texture coordinates */ + for (layer_num = cogl_pipeline_get_n_layers (pipeline) - 1; + layer_num >= 0; + layer_num--) + if (_cogl_pipeline_layer_has_user_matrix (pipeline, layer_num)) + return FALSE; + } + + /* Now we need to verify that each clip entry's matrix is just a + translation of the journal entry's modelview matrix. We can + also work out the bounds of the clip in modelview space using + this translation */ + for (clip_entry = clip_stack; clip_entry; clip_entry = clip_entry->parent) + { + float rect_x1, rect_y1, rect_x2, rect_y2; + CoglClipStackRect *clip_rect; + float tx, ty, tz; + CoglMatrixEntry *modelview_entry; + + clip_rect = (CoglClipStackRect *) clip_entry; + + modelview_entry = journal_entry->modelview_entry; + if (!cogl_matrix_entry_calculate_translation (clip_rect->matrix_entry, + modelview_entry, + &tx, &ty, &tz)) + return FALSE; + + if (clip_rect->x0 < clip_rect->x1) + { + rect_x1 = clip_rect->x0; + rect_x2 = clip_rect->x1; + } + else + { + rect_x1 = clip_rect->x1; + rect_x2 = clip_rect->x0; + } + if (clip_rect->y0 < clip_rect->y1) + { + rect_y1 = clip_rect->y0; + rect_y2 = clip_rect->y1; + } + else + { + rect_y1 = clip_rect->y1; + rect_y2 = clip_rect->y0; + } + + clip_bounds_out->x_1 = MAX (clip_bounds_out->x_1, rect_x1 - tx); + clip_bounds_out->y_1 = MAX (clip_bounds_out->y_1, rect_y1 - ty); + clip_bounds_out->x_2 = MIN (clip_bounds_out->x_2, rect_x2 - tx); + clip_bounds_out->y_2 = MIN (clip_bounds_out->y_2, rect_y2 - ty); + } + + if (clip_bounds_out->x_2 <= clip_bounds_out->x_1 || + clip_bounds_out->y_2 <= clip_bounds_out->y_1) + memset (clip_bounds_out, 0, sizeof (ClipBounds)); + + return TRUE; +} + +static void +software_clip_entry (CoglJournalEntry *journal_entry, + float *verts, + ClipBounds *clip_bounds) +{ + size_t stride = + GET_JOURNAL_ARRAY_STRIDE_FOR_N_LAYERS (journal_entry->n_layers); + float rx1, ry1, rx2, ry2; + float vx1, vy1, vx2, vy2; + int layer_num; + + /* Remove the clip on the entry */ + _cogl_clip_stack_unref (journal_entry->clip_stack); + journal_entry->clip_stack = NULL; + + vx1 = verts[0]; + vy1 = verts[1]; + vx2 = verts[stride]; + vy2 = verts[stride + 1]; + + if (vx1 < vx2) + { + rx1 = vx1; + rx2 = vx2; + } + else + { + rx1 = vx2; + rx2 = vx1; + } + if (vy1 < vy2) + { + ry1 = vy1; + ry2 = vy2; + } + else + { + ry1 = vy2; + ry2 = vy1; + } + + rx1 = CLAMP (rx1, clip_bounds->x_1, clip_bounds->x_2); + ry1 = CLAMP (ry1, clip_bounds->y_1, clip_bounds->y_2); + rx2 = CLAMP (rx2, clip_bounds->x_1, clip_bounds->x_2); + ry2 = CLAMP (ry2, clip_bounds->y_1, clip_bounds->y_2); + + /* Check if the rectangle intersects the clip at all */ + if (rx1 == rx2 || ry1 == ry2) + /* Will set all of the vertex data to 0 in the hope that this + will create a degenerate rectangle and the GL driver will + be able to clip it quickly */ + memset (verts, 0, sizeof (float) * stride * 2); + else + { + if (vx1 > vx2) + { + float t = rx1; + rx1 = rx2; + rx2 = t; + } + if (vy1 > vy2) + { + float t = ry1; + ry1 = ry2; + ry2 = t; + } + + verts[0] = rx1; + verts[1] = ry1; + verts[stride] = rx2; + verts[stride + 1] = ry2; + + /* Convert the rectangle coordinates to a fraction of the original + rectangle */ + rx1 = (rx1 - vx1) / (vx2 - vx1); + ry1 = (ry1 - vy1) / (vy2 - vy1); + rx2 = (rx2 - vx1) / (vx2 - vx1); + ry2 = (ry2 - vy1) / (vy2 - vy1); + + for (layer_num = 0; layer_num < journal_entry->n_layers; layer_num++) + { + float *t = verts + 2 + 2 * layer_num; + float tx1 = t[0], ty1 = t[1]; + float tx2 = t[stride], ty2 = t[stride + 1]; + t[0] = rx1 * (tx2 - tx1) + tx1; + t[1] = ry1 * (ty2 - ty1) + ty1; + t[stride] = rx2 * (tx2 - tx1) + tx1; + t[stride + 1] = ry2 * (ty2 - ty1) + ty1; + } + } +} + +static void +maybe_software_clip_entries (CoglJournalEntry *batch_start, + int batch_len, + CoglJournalFlushState *state) +{ + CoglContext *ctx; + CoglJournal *journal; + CoglClipStack *clip_stack, *clip_entry; + int entry_num; + + /* This tries to find cases where the entry is logged with a clip + but it would be faster to modify the vertex and texture + coordinates rather than flush the clip so that it can batch + better */ + + /* If the batch is reasonably long then it's worthwhile programming + the GPU to do the clip */ + if (batch_len >= COGL_JOURNAL_HARDWARE_CLIP_THRESHOLD) + return; + + clip_stack = batch_start->clip_stack; + + if (clip_stack == NULL) + return; + + /* Verify that all of the clip stack entries are a simple rectangle + clip */ + for (clip_entry = clip_stack; clip_entry; clip_entry = clip_entry->parent) + if (clip_entry->type != COGL_CLIP_STACK_RECT) + return; + + ctx = state->ctx; + journal = state->journal; + + /* This scratch buffer is used to store the translation for each + entry in the journal. We store it in a separate buffer because + it's expensive to calculate but at this point we still don't know + whether we can clip all of the entries so we don't want to do the + rest of the dependant calculations until we're sure we can. */ + if (ctx->journal_clip_bounds == NULL) + ctx->journal_clip_bounds = g_array_new (FALSE, FALSE, sizeof (ClipBounds)); + g_array_set_size (ctx->journal_clip_bounds, batch_len); + + for (entry_num = 0; entry_num < batch_len; entry_num++) + { + CoglJournalEntry *journal_entry = batch_start + entry_num; + CoglJournalEntry *prev_journal_entry = + entry_num ? batch_start + (entry_num - 1) : NULL; + ClipBounds *clip_bounds = &g_array_index (ctx->journal_clip_bounds, + ClipBounds, entry_num); + + if (!can_software_clip_entry (journal_entry, prev_journal_entry, + clip_stack, + clip_bounds)) + return; + } + + /* If we make it here then we know we can software clip the entire batch */ + + COGL_NOTE (CLIPPING, "Software clipping a batch of length %i", batch_len); + + for (entry_num = 0; entry_num < batch_len; entry_num++) + { + CoglJournalEntry *journal_entry = batch_start + entry_num; + float *verts = &g_array_index (journal->vertices, float, + journal_entry->array_offset + 1); + ClipBounds *clip_bounds = &g_array_index (ctx->journal_clip_bounds, + ClipBounds, entry_num); + + software_clip_entry (journal_entry, verts, clip_bounds); + } + + return; +} + +static void +_cogl_journal_maybe_software_clip_entries (CoglJournalEntry *batch_start, + int batch_len, + void *data) +{ + CoglJournalFlushState *state = data; + + COGL_STATIC_TIMER (time_check_software_clip, + "Journal Flush", /* parent */ + "flush: software clipping", + "Time spent software clipping", + 0 /* no application private data */); + + COGL_TIMER_START (_cogl_uprof_context, + time_check_software_clip); + + maybe_software_clip_entries (batch_start, batch_len, state); + + COGL_TIMER_STOP (_cogl_uprof_context, + time_check_software_clip); +} + +static CoglBool +compare_entry_clip_stacks (CoglJournalEntry *entry0, CoglJournalEntry *entry1) +{ + return entry0->clip_stack == entry1->clip_stack; +} + +/* Gets a new vertex array from the pool. A reference is taken on the + array so it can be treated as if it was just newly allocated */ +static CoglAttributeBuffer * +create_attribute_buffer (CoglJournal *journal, + size_t n_bytes) +{ + CoglAttributeBuffer *vbo; + CoglContext *ctx = journal->framebuffer->context; + + /* If CoglBuffers are being emulated with malloc then there's not + really any point in using the pool so we'll just allocate the + buffer directly */ + if (!_cogl_has_private_feature (ctx, COGL_PRIVATE_FEATURE_VBOS)) + return cogl_attribute_buffer_new_with_size (ctx, n_bytes); + + vbo = journal->vbo_pool[journal->next_vbo_in_pool]; + + if (vbo == NULL) + { + vbo = cogl_attribute_buffer_new_with_size (ctx, n_bytes); + journal->vbo_pool[journal->next_vbo_in_pool] = vbo; + } + else if (cogl_buffer_get_size (COGL_BUFFER (vbo)) < n_bytes) + { + /* If the buffer is too small then we'll just recreate it */ + cogl_object_unref (vbo); + vbo = cogl_attribute_buffer_new_with_size (ctx, n_bytes); + journal->vbo_pool[journal->next_vbo_in_pool] = vbo; + } + + journal->next_vbo_in_pool = ((journal->next_vbo_in_pool + 1) % + COGL_JOURNAL_VBO_POOL_SIZE); + + return cogl_object_ref (vbo); +} + +static CoglAttributeBuffer * +upload_vertices (CoglJournal *journal, + const CoglJournalEntry *entries, + int n_entries, + size_t needed_vbo_len, + GArray *vertices) +{ + CoglAttributeBuffer *attribute_buffer; + CoglBuffer *buffer; + const float *vin; + float *vout; + int entry_num; + int i; + CoglMatrixEntry *last_modelview_entry = NULL; + CoglMatrix modelview; + + g_assert (needed_vbo_len); + + attribute_buffer = create_attribute_buffer (journal, needed_vbo_len * 4); + buffer = COGL_BUFFER (attribute_buffer); + cogl_buffer_set_update_hint (buffer, COGL_BUFFER_UPDATE_HINT_STATIC); + + vout = _cogl_buffer_map_range_for_fill_or_fallback (buffer, + 0, /* offset */ + needed_vbo_len * 4); + vin = &g_array_index (vertices, float, 0); + + /* Expand the number of vertices from 2 to 4 while uploading */ + for (entry_num = 0; entry_num < n_entries; entry_num++) + { + const CoglJournalEntry *entry = entries + entry_num; + size_t vb_stride = GET_JOURNAL_VB_STRIDE_FOR_N_LAYERS (entry->n_layers); + size_t array_stride = + GET_JOURNAL_ARRAY_STRIDE_FOR_N_LAYERS (entry->n_layers); + + /* Copy the color to all four of the vertices */ + for (i = 0; i < 4; i++) + memcpy (vout + vb_stride * i + POS_STRIDE, vin, 4); + vin++; + + if (G_UNLIKELY (COGL_DEBUG_ENABLED (COGL_DEBUG_DISABLE_SOFTWARE_TRANSFORM))) + { + vout[vb_stride * 0] = vin[0]; + vout[vb_stride * 0 + 1] = vin[1]; + vout[vb_stride * 1] = vin[0]; + vout[vb_stride * 1 + 1] = vin[array_stride + 1]; + vout[vb_stride * 2] = vin[array_stride]; + vout[vb_stride * 2 + 1] = vin[array_stride + 1]; + vout[vb_stride * 3] = vin[array_stride]; + vout[vb_stride * 3 + 1] = vin[1]; + } + else + { + float v[8]; + + v[0] = vin[0]; + v[1] = vin[1]; + v[2] = vin[0]; + v[3] = vin[array_stride + 1]; + v[4] = vin[array_stride]; + v[5] = vin[array_stride + 1]; + v[6] = vin[array_stride]; + v[7] = vin[1]; + + if (entry->modelview_entry != last_modelview_entry) + cogl_matrix_entry_get (entry->modelview_entry, &modelview); + cogl_matrix_transform_points (&modelview, + 2, /* n_components */ + sizeof (float) * 2, /* stride_in */ + v, /* points_in */ + /* strideout */ + vb_stride * sizeof (float), + vout, /* points_out */ + 4 /* n_points */); + } + + for (i = 0; i < entry->n_layers; i++) + { + const float *tin = vin + 2; + float *tout = vout + POS_STRIDE + COLOR_STRIDE; + + tout[vb_stride * 0 + i * 2] = tin[i * 2]; + tout[vb_stride * 0 + 1 + i * 2] = tin[i * 2 + 1]; + tout[vb_stride * 1 + i * 2] = tin[i * 2]; + tout[vb_stride * 1 + 1 + i * 2] = tin[array_stride + i * 2 + 1]; + tout[vb_stride * 2 + i * 2] = tin[array_stride + i * 2]; + tout[vb_stride * 2 + 1 + i * 2] = tin[array_stride + i * 2 + 1]; + tout[vb_stride * 3 + i * 2] = tin[array_stride + i * 2]; + tout[vb_stride * 3 + 1 + i * 2] = tin[i * 2 + 1]; + } + + vin += array_stride * 2; + vout += vb_stride * 4; + } + + _cogl_buffer_unmap_for_fill_or_fallback (buffer); + + return attribute_buffer; +} + +void +_cogl_journal_discard (CoglJournal *journal) +{ + int i; + + if (journal->entries->len <= 0) + return; + + for (i = 0; i < journal->entries->len; i++) + { + CoglJournalEntry *entry = + &g_array_index (journal->entries, CoglJournalEntry, i); + _cogl_pipeline_journal_unref (entry->pipeline); + cogl_matrix_entry_unref (entry->modelview_entry); + _cogl_clip_stack_unref (entry->clip_stack); + } + + g_array_set_size (journal->entries, 0); + g_array_set_size (journal->vertices, 0); + journal->needed_vbo_len = 0; + journal->fast_read_pixel_count = 0; + + /* The journal only holds a reference to the framebuffer while the + journal is not empty */ + cogl_object_unref (journal->framebuffer); +} + +/* Note: A return value of FALSE doesn't mean 'no' it means + * 'unknown' */ +CoglBool +_cogl_journal_all_entries_within_bounds (CoglJournal *journal, + float clip_x0, + float clip_y0, + float clip_x1, + float clip_y1) +{ + CoglJournalEntry *entry = (CoglJournalEntry *)journal->entries->data; + CoglClipStack *clip_entry; + CoglClipStack *reference = NULL; + int bounds_x0; + int bounds_y0; + int bounds_x1; + int bounds_y1; + int i; + + if (journal->entries->len == 0) + return TRUE; + + /* Find the shortest clip_stack ancestry that leaves us in the + * required bounds */ + for (clip_entry = entry->clip_stack; + clip_entry; + clip_entry = clip_entry->parent) + { + _cogl_clip_stack_get_bounds (clip_entry, + &bounds_x0, &bounds_y0, + &bounds_x1, &bounds_y1); + + if (bounds_x0 >= clip_x0 && bounds_y0 >= clip_y0 && + bounds_x1 <= clip_x1 && bounds_y1 <= clip_y1) + reference = clip_entry; + else + break; + } + + if (!reference) + return FALSE; + + /* For the remaining journal entries we will only verify they share + * 'reference' as an ancestor in their clip stack since that's + * enough to know that they would be within the required bounds. + */ + for (i = 1; i < journal->entries->len; i++) + { + CoglBool found_reference = FALSE; + entry = &g_array_index (journal->entries, CoglJournalEntry, i); + + for (clip_entry = entry->clip_stack; + clip_entry; + clip_entry = clip_entry->parent) + { + if (clip_entry == reference) + { + found_reference = TRUE; + break; + } + } + + if (!found_reference) + return FALSE; + } + + return TRUE; +} + +static void +post_fences (CoglJournal *journal) +{ + CoglFenceClosure *fence, *tmp; + + _cogl_list_for_each_safe (fence, tmp, &journal->pending_fences, link) + { + _cogl_list_remove (&fence->link); + _cogl_fence_submit (fence); + } +} + +/* XXX NB: When _cogl_journal_flush() returns all state relating + * to pipelines, all glEnable flags and current matrix state + * is undefined. + */ +void +_cogl_journal_flush (CoglJournal *journal) +{ + CoglFramebuffer *framebuffer; + CoglContext *ctx; + CoglJournalFlushState state; + int i; + COGL_STATIC_TIMER (flush_timer, + "Mainloop", /* parent */ + "Journal Flush", + "The time spent flushing the Cogl journal", + 0 /* no application private data */); + COGL_STATIC_TIMER (discard_timer, + "Journal Flush", /* parent */ + "flush: discard", + "The time spent discarding the Cogl journal after a flush", + 0 /* no application private data */); + + if (journal->entries->len == 0) + { + post_fences (journal); + return; + } + + framebuffer = journal->framebuffer; + ctx = framebuffer->context; + + /* The entries in this journal may depend on images in other + * framebuffers which may require that we flush the journals + * associated with those framebuffers before we can flush + * this journal... */ + _cogl_framebuffer_flush_dependency_journals (framebuffer); + + /* Note: we start the timer after flushing dependency journals so + * that the timer isn't started recursively. */ + COGL_TIMER_START (_cogl_uprof_context, flush_timer); + + if (G_UNLIKELY (COGL_DEBUG_ENABLED (COGL_DEBUG_BATCHING))) + g_print ("BATCHING: journal len = %d\n", journal->entries->len); + + /* NB: the journal deals with flushing the modelview stack and clip + state manually */ + _cogl_framebuffer_flush_state (framebuffer, + framebuffer, + COGL_FRAMEBUFFER_STATE_ALL & + ~(COGL_FRAMEBUFFER_STATE_MODELVIEW | + COGL_FRAMEBUFFER_STATE_CLIP)); + + /* We need to mark the current modelview state of the framebuffer as + * dirty because we are going to manually replace it */ + ctx->current_draw_buffer_changes |= COGL_FRAMEBUFFER_STATE_MODELVIEW; + + state.ctx = ctx; + state.journal = journal; + + state.attributes = ctx->journal_flush_attributes_array; + + if (G_UNLIKELY ((COGL_DEBUG_ENABLED (COGL_DEBUG_DISABLE_SOFTWARE_CLIP)) == 0)) + { + /* We do an initial walk of the journal to analyse the clip stack + batches to see if we can do software clipping. We do this as a + separate walk of the journal because we can modify entries and + this may end up joining together clip stack batches in the next + iteration. */ + batch_and_call ((CoglJournalEntry *)journal->entries->data, /* first entry */ + journal->entries->len, /* max number of entries to consider */ + compare_entry_clip_stacks, + _cogl_journal_maybe_software_clip_entries, /* callback */ + &state); /* data */ + } + + /* We upload the vertices after the clip stack pass in case it + modifies the entries */ + state.attribute_buffer = + upload_vertices (journal, + &g_array_index (journal->entries, CoglJournalEntry, 0), + journal->entries->len, + journal->needed_vbo_len, + journal->vertices); + state.array_offset = 0; + + /* batch_and_call() batches a list of journal entries according to some + * given criteria and calls a callback once for each determined batch. + * + * The process of flushing the journal is staggered to reduce the amount + * of driver/GPU state changes necessary: + * 1) We split the entries according to the clip state. + * 2) We split the entries according to the stride of the vertices: + * Each time the stride of our vertex data changes we need to call + * gl{Vertex,Color}Pointer to inform GL of new VBO offsets. + * Currently the only thing that affects the stride of our vertex data + * is the number of pipeline layers. + * 3) We split the entries explicitly by the number of pipeline layers: + * We pad our vertex data when the number of layers is < 2 so that we + * can minimize changes in stride. Each time the number of layers + * changes we need to call glTexCoordPointer to inform GL of new VBO + * offsets. + * 4) We then split according to compatible Cogl pipelines: + * This is where we flush pipeline state + * 5) Finally we split according to modelview matrix changes: + * This is when we finally tell GL to draw something. + * Note: Splitting by modelview changes is skipped when are doing the + * vertex transformation in software at log time. + */ + batch_and_call ((CoglJournalEntry *)journal->entries->data, /* first entry */ + journal->entries->len, /* max number of entries to consider */ + compare_entry_clip_stacks, + _cogl_journal_flush_clip_stacks_and_entries, /* callback */ + &state); /* data */ + + for (i = 0; i < state.attributes->len; i++) + cogl_object_unref (g_array_index (state.attributes, CoglAttribute *, i)); + g_array_set_size (state.attributes, 0); + + cogl_object_unref (state.attribute_buffer); + + COGL_TIMER_START (_cogl_uprof_context, discard_timer); + _cogl_journal_discard (journal); + COGL_TIMER_STOP (_cogl_uprof_context, discard_timer); + + post_fences (journal); + + COGL_TIMER_STOP (_cogl_uprof_context, flush_timer); +} + +static CoglBool +add_framebuffer_deps_cb (CoglPipelineLayer *layer, void *user_data) +{ + CoglFramebuffer *framebuffer = user_data; + CoglTexture *texture = _cogl_pipeline_layer_get_texture_real (layer); + const GList *l; + + if (!texture) + return TRUE; + + for (l = _cogl_texture_get_associated_framebuffers (texture); l; l = l->next) + _cogl_framebuffer_add_dependency (framebuffer, l->data); + + return TRUE; +} + +void +_cogl_journal_log_quad (CoglJournal *journal, + const float *position, + CoglPipeline *pipeline, + int n_layers, + CoglTexture *layer0_override_texture, + const float *tex_coords, + unsigned int tex_coords_len) +{ + CoglFramebuffer *framebuffer = journal->framebuffer; + size_t stride; + int next_vert; + float *v; + int i; + int next_entry; + uint32_t disable_layers; + CoglJournalEntry *entry; + CoglPipeline *final_pipeline; + CoglClipStack *clip_stack; + CoglPipelineFlushOptions flush_options; + CoglMatrixStack *modelview_stack; + COGL_STATIC_TIMER (log_timer, + "Mainloop", /* parent */ + "Journal Log", + "The time spent logging in the Cogl journal", + 0 /* no application private data */); + + COGL_TIMER_START (_cogl_uprof_context, log_timer); + + /* Adding something to the journal should mean that we are in the + * middle of the scene. Although this will also end up being set + * when the journal is actually flushed, we set it here explicitly + * so that we will know sooner */ + _cogl_framebuffer_mark_mid_scene (framebuffer); + + /* If the framebuffer was previously empty then we'll take a + reference to the current framebuffer. This reference will be + removed when the journal is flushed */ + if (journal->vertices->len == 0) + cogl_object_ref (framebuffer); + + /* The vertex data is logged into a separate array. The data needs + to be copied into a vertex array before it's given to GL so we + only store two vertices per quad and expand it to four while + uploading. */ + + /* XXX: See definition of GET_JOURNAL_ARRAY_STRIDE_FOR_N_LAYERS for details + * about how we pack our vertex data */ + stride = GET_JOURNAL_ARRAY_STRIDE_FOR_N_LAYERS (n_layers); + + next_vert = journal->vertices->len; + g_array_set_size (journal->vertices, next_vert + 2 * stride + 1); + v = &g_array_index (journal->vertices, float, next_vert); + + /* We calculate the needed size of the vbo as we go because it + depends on the number of layers in each entry and it's not easy + calculate based on the length of the logged vertices array */ + journal->needed_vbo_len += GET_JOURNAL_VB_STRIDE_FOR_N_LAYERS (n_layers) * 4; + + /* XXX: All the jumping around to fill in this strided buffer doesn't + * seem ideal. */ + + /* FIXME: This is a hacky optimization, since it will break if we + * change the definition of CoglColor: */ + _cogl_pipeline_get_colorubv (pipeline, (uint8_t *) v); + v++; + + memcpy (v, position, sizeof (float) * 2); + memcpy (v + stride, position + 2, sizeof (float) * 2); + + for (i = 0; i < n_layers; i++) + { + /* XXX: See definition of GET_JOURNAL_ARRAY_STRIDE_FOR_N_LAYERS + * for details about how we pack our vertex data */ + GLfloat *t = v + 2 + i * 2; + + memcpy (t, tex_coords + i * 4, sizeof (float) * 2); + memcpy (t + stride, tex_coords + i * 4 + 2, sizeof (float) * 2); + } + + if (G_UNLIKELY (COGL_DEBUG_ENABLED (COGL_DEBUG_JOURNAL))) + { + g_print ("Logged new quad:\n"); + v = &g_array_index (journal->vertices, float, next_vert); + _cogl_journal_dump_logged_quad ((uint8_t *)v, n_layers); + } + + next_entry = journal->entries->len; + g_array_set_size (journal->entries, next_entry + 1); + entry = &g_array_index (journal->entries, CoglJournalEntry, next_entry); + + entry->n_layers = n_layers; + entry->array_offset = next_vert; + + final_pipeline = pipeline; + + flush_options.flags = 0; + if (G_UNLIKELY (cogl_pipeline_get_n_layers (pipeline) != n_layers)) + { + disable_layers = (1 << n_layers) - 1; + disable_layers = ~disable_layers; + flush_options.disable_layers = disable_layers; + flush_options.flags |= COGL_PIPELINE_FLUSH_DISABLE_MASK; + } + if (G_UNLIKELY (layer0_override_texture)) + { + flush_options.flags |= COGL_PIPELINE_FLUSH_LAYER0_OVERRIDE; + flush_options.layer0_override_texture = layer0_override_texture; + } + + if (G_UNLIKELY (flush_options.flags)) + { + final_pipeline = cogl_pipeline_copy (pipeline); + _cogl_pipeline_apply_overrides (final_pipeline, &flush_options); + } + + entry->pipeline = _cogl_pipeline_journal_ref (final_pipeline); + + clip_stack = _cogl_framebuffer_get_clip_stack (framebuffer); + entry->clip_stack = _cogl_clip_stack_ref (clip_stack); + + if (G_UNLIKELY (final_pipeline != pipeline)) + cogl_object_unref (final_pipeline); + + modelview_stack = + _cogl_framebuffer_get_modelview_stack (framebuffer); + entry->modelview_entry = cogl_matrix_entry_ref (modelview_stack->last_entry); + + _cogl_pipeline_foreach_layer_internal (pipeline, + add_framebuffer_deps_cb, + framebuffer); + + if (G_UNLIKELY (COGL_DEBUG_ENABLED (COGL_DEBUG_DISABLE_BATCHING))) + _cogl_journal_flush (journal); + + COGL_TIMER_STOP (_cogl_uprof_context, log_timer); +} + +static void +entry_to_screen_polygon (CoglFramebuffer *framebuffer, + const CoglJournalEntry *entry, + float *vertices, + float *poly) +{ + size_t array_stride = + GET_JOURNAL_ARRAY_STRIDE_FOR_N_LAYERS (entry->n_layers); + CoglMatrixStack *projection_stack; + CoglMatrix projection; + CoglMatrix modelview; + int i; + float viewport[4]; + + poly[0] = vertices[0]; + poly[1] = vertices[1]; + poly[2] = 0; + poly[3] = 1; + + poly[4] = vertices[0]; + poly[5] = vertices[array_stride + 1]; + poly[6] = 0; + poly[7] = 1; + + poly[8] = vertices[array_stride]; + poly[9] = vertices[array_stride + 1]; + poly[10] = 0; + poly[11] = 1; + + poly[12] = vertices[array_stride]; + poly[13] = vertices[1]; + poly[14] = 0; + poly[15] = 1; + + /* TODO: perhaps split the following out into a more generalized + * _cogl_transform_points utility... + */ + + cogl_matrix_entry_get (entry->modelview_entry, &modelview); + cogl_matrix_transform_points (&modelview, + 2, /* n_components */ + sizeof (float) * 4, /* stride_in */ + poly, /* points_in */ + /* strideout */ + sizeof (float) * 4, + poly, /* points_out */ + 4 /* n_points */); + + projection_stack = + _cogl_framebuffer_get_projection_stack (framebuffer); + cogl_matrix_stack_get (projection_stack, &projection); + + cogl_matrix_project_points (&projection, + 3, /* n_components */ + sizeof (float) * 4, /* stride_in */ + poly, /* points_in */ + /* strideout */ + sizeof (float) * 4, + poly, /* points_out */ + 4 /* n_points */); + + cogl_framebuffer_get_viewport4fv (framebuffer, viewport); + +/* Scale from OpenGL normalized device coordinates (ranging from -1 to 1) + * to Cogl window/framebuffer coordinates (ranging from 0 to buffer-size) with + * (0,0) being top left. */ +#define VIEWPORT_TRANSFORM_X(x, vp_origin_x, vp_width) \ + ( ( ((x) + 1.0) * ((vp_width) / 2.0) ) + (vp_origin_x) ) +/* Note: for Y we first flip all coordinates around the X axis while in + * normalized device coodinates */ +#define VIEWPORT_TRANSFORM_Y(y, vp_origin_y, vp_height) \ + ( ( ((-(y)) + 1.0) * ((vp_height) / 2.0) ) + (vp_origin_y) ) + + /* Scale from normalized device coordinates (in range [-1,1]) to + * window coordinates ranging [0,window-size] ... */ + for (i = 0; i < 4; i++) + { + float w = poly[4 * i + 3]; + + /* Perform perspective division */ + poly[4 * i] /= w; + poly[4 * i + 1] /= w; + + /* Apply viewport transform */ + poly[4 * i] = VIEWPORT_TRANSFORM_X (poly[4 * i], + viewport[0], viewport[2]); + poly[4 * i + 1] = VIEWPORT_TRANSFORM_Y (poly[4 * i + 1], + viewport[1], viewport[3]); + } + +#undef VIEWPORT_TRANSFORM_X +#undef VIEWPORT_TRANSFORM_Y +} + +static CoglBool +try_checking_point_hits_entry_after_clipping (CoglFramebuffer *framebuffer, + CoglJournalEntry *entry, + float *vertices, + float x, + float y, + CoglBool *hit) +{ + CoglBool can_software_clip = TRUE; + CoglBool needs_software_clip = FALSE; + CoglClipStack *clip_entry; + + *hit = TRUE; + + /* Verify that all of the clip stack entries are simple rectangle + * clips */ + for (clip_entry = entry->clip_stack; + clip_entry; + clip_entry = clip_entry->parent) + { + if (x < clip_entry->bounds_x0 || + x >= clip_entry->bounds_x1 || + y < clip_entry->bounds_y0 || + y >= clip_entry->bounds_y1) + { + *hit = FALSE; + return TRUE; + } + + if (clip_entry->type == COGL_CLIP_STACK_WINDOW_RECT) + { + /* XXX: technically we could still run the software clip in + * this case because for our purposes we know this clip + * can be ignored now, but [can_]sofware_clip_entry() doesn't + * know this and will bail out. */ + can_software_clip = FALSE; + } + else if (clip_entry->type == COGL_CLIP_STACK_RECT) + { + CoglClipStackRect *rect_entry = (CoglClipStackRect *)entry; + + if (rect_entry->can_be_scissor == FALSE) + needs_software_clip = TRUE; + /* If can_be_scissor is TRUE then we know it's screen + * aligned and the hit test we did above has determined + * that we are inside this clip. */ + } + else + return FALSE; + } + + if (needs_software_clip) + { + ClipBounds clip_bounds; + float poly[16]; + + if (!can_software_clip) + return FALSE; + + if (!can_software_clip_entry (entry, NULL, + entry->clip_stack, &clip_bounds)) + return FALSE; + + software_clip_entry (entry, vertices, &clip_bounds); + entry_to_screen_polygon (framebuffer, entry, vertices, poly); + + *hit = _cogl_util_point_in_screen_poly (x, y, poly, sizeof (float) * 4, 4); + return TRUE; + } + + return TRUE; +} + +CoglBool +_cogl_journal_try_read_pixel (CoglJournal *journal, + int x, + int y, + CoglBitmap *bitmap, + CoglBool *found_intersection) +{ + CoglContext *ctx; + CoglPixelFormat format; + int i; + + /* XXX: this number has been plucked out of thin air, but the idea + * is that if so many pixels are being read from the same un-changed + * journal than we expect that it will be more efficient to fail + * here so we end up flushing and rendering the journal so that + * further reads can directly read from the framebuffer. There will + * be a bit more lag to flush the render but if there are going to + * continue being lots of arbitrary single pixel reads they will end + * up faster in the end. */ + if (journal->fast_read_pixel_count > 50) + return FALSE; + + format = cogl_bitmap_get_format (bitmap); + + if (format != COGL_PIXEL_FORMAT_RGBA_8888_PRE && + format != COGL_PIXEL_FORMAT_RGBA_8888) + return FALSE; + + ctx = _cogl_bitmap_get_context (bitmap); + + *found_intersection = FALSE; + + /* NB: The most recently added journal entry is the last entry, and + * assuming this is a simple scene only comprised of opaque coloured + * rectangles with no special pipelines involved (e.g. enabling + * depth testing) then we can assume painter's algorithm for the + * entries and so our fast read-pixel just needs to walk backwards + * through the journal entries trying to intersect each entry with + * the given point of interest. */ + for (i = journal->entries->len - 1; i >= 0; i--) + { + CoglJournalEntry *entry = + &g_array_index (journal->entries, CoglJournalEntry, i); + uint8_t *color = (uint8_t *)&g_array_index (journal->vertices, float, + entry->array_offset); + float *vertices = (float *)color + 1; + float poly[16]; + CoglFramebuffer *framebuffer = journal->framebuffer; + uint8_t *pixel; + CoglError *ignore_error; + + entry_to_screen_polygon (framebuffer, entry, vertices, poly); + + if (!_cogl_util_point_in_screen_poly (x, y, poly, sizeof (float) * 4, 4)) + continue; + + if (entry->clip_stack) + { + CoglBool hit; + + if (!try_checking_point_hits_entry_after_clipping (framebuffer, + entry, + vertices, + x, y, &hit)) + return FALSE; /* hit couldn't be determined */ + + if (!hit) + continue; + } + + *found_intersection = TRUE; + + /* If we find that the rectangle the point of interest + * intersects has any state more complex than a constant opaque + * color then we bail out. */ + if (!_cogl_pipeline_equal (ctx->opaque_color_pipeline, entry->pipeline, + (COGL_PIPELINE_STATE_ALL & + ~COGL_PIPELINE_STATE_COLOR), + COGL_PIPELINE_LAYER_STATE_ALL, + 0)) + return FALSE; + + + /* we currently only care about cases where the premultiplied or + * unpremultipled colors are equivalent... */ + if (color[3] != 0xff) + return FALSE; + + pixel = _cogl_bitmap_map (bitmap, + COGL_BUFFER_ACCESS_WRITE, + COGL_BUFFER_MAP_HINT_DISCARD, + &ignore_error); + if (pixel == NULL) + { + cogl_error_free (ignore_error); + return FALSE; + } + + pixel[0] = color[0]; + pixel[1] = color[1]; + pixel[2] = color[2]; + pixel[3] = color[3]; + + _cogl_bitmap_unmap (bitmap); + + goto success; + } + +success: + journal->fast_read_pixel_count++; + return TRUE; +} |