diff options
Diffstat (limited to 'chromium/cc/raster')
26 files changed, 627 insertions, 1957 deletions
diff --git a/chromium/cc/raster/bitmap_raster_buffer_provider.cc b/chromium/cc/raster/bitmap_raster_buffer_provider.cc index 12eaae7efa7..1e4e21bb134 100644 --- a/chromium/cc/raster/bitmap_raster_buffer_provider.cc +++ b/chromium/cc/raster/bitmap_raster_buffer_provider.cc @@ -164,4 +164,8 @@ uint64_t BitmapRasterBufferProvider::SetReadyToDrawCallback( void BitmapRasterBufferProvider::Shutdown() {} +bool BitmapRasterBufferProvider::CheckRasterFinishedQueries() { + return false; +} + } // namespace cc diff --git a/chromium/cc/raster/bitmap_raster_buffer_provider.h b/chromium/cc/raster/bitmap_raster_buffer_provider.h index 5174176fd11..9ceb0012b23 100644 --- a/chromium/cc/raster/bitmap_raster_buffer_provider.h +++ b/chromium/cc/raster/bitmap_raster_buffer_provider.h @@ -43,6 +43,7 @@ class CC_EXPORT BitmapRasterBufferProvider : public RasterBufferProvider { const base::Closure& callback, uint64_t pending_callback_id) const override; void Shutdown() override; + bool CheckRasterFinishedQueries() override; private: std::unique_ptr<base::trace_event::ConvertableToTraceFormat> StateAsValue() diff --git a/chromium/cc/raster/gpu_raster_buffer_provider.cc b/chromium/cc/raster/gpu_raster_buffer_provider.cc index 3db004cf645..2b9b386ee08 100644 --- a/chromium/cc/raster/gpu_raster_buffer_provider.cc +++ b/chromium/cc/raster/gpu_raster_buffer_provider.cc @@ -10,6 +10,8 @@ #include "base/macros.h" #include "base/metrics/histogram_macros.h" +#include "base/rand_util.h" +#include "base/strings/stringprintf.h" #include "base/trace_event/process_memory_dump.h" #include "base/trace_event/trace_event.h" #include "cc/base/histograms.h" @@ -28,13 +30,14 @@ #include "gpu/command_buffer/client/context_support.h" #include "gpu/command_buffer/client/gles2_interface.h" #include "gpu/command_buffer/client/raster_interface.h" -#include "gpu/command_buffer/common/gpu_memory_buffer_support.h" +#include "gpu/command_buffer/client/shared_image_interface.h" +#include "gpu/command_buffer/common/shared_image_trace_utils.h" +#include "gpu/command_buffer/common/shared_image_usage.h" #include "third_party/skia/include/core/SkMultiPictureDraw.h" #include "third_party/skia/include/core/SkPictureRecorder.h" #include "third_party/skia/include/core/SkSurface.h" #include "third_party/skia/include/gpu/GrContext.h" #include "ui/gfx/geometry/axis_transform2d.h" -#include "ui/gl/trace_util.h" #include "url/gurl.h" namespace cc { @@ -114,10 +117,10 @@ class ScopedSkSurfaceForUnpremultiplyAndDither { static void RasterizeSourceOOP( const RasterSource* raster_source, bool resource_has_previous_content, - const gpu::Mailbox& mailbox, + gpu::Mailbox* mailbox, + const gpu::SyncToken& sync_token, GLenum texture_target, bool texture_is_overlay_candidate, - bool texture_storage_allocated, const gfx::Size& resource_size, viz::ResourceFormat resource_format, const gfx::ColorSpace& color_space, @@ -128,16 +131,22 @@ static void RasterizeSourceOOP( viz::RasterContextProvider* context_provider, int msaa_sample_count) { gpu::raster::RasterInterface* ri = context_provider->RasterInterface(); + if (mailbox->IsZero()) { + DCHECK(!sync_token.HasData()); + auto* sii = context_provider->SharedImageInterface(); + uint32_t flags = gpu::SHARED_IMAGE_USAGE_RASTER; + if (texture_is_overlay_candidate) + flags |= gpu::SHARED_IMAGE_USAGE_SCANOUT; + *mailbox = sii->CreateSharedImage(resource_format, resource_size, + color_space, flags); + ri->WaitSyncTokenCHROMIUM(sii->GenUnverifiedSyncToken().GetConstData()); + } else { + ri->WaitSyncTokenCHROMIUM(sync_token.GetConstData()); + } + GLuint texture_id = ri->CreateAndConsumeTexture( texture_is_overlay_candidate, gfx::BufferUsage::SCANOUT, resource_format, - mailbox.name); - if (!texture_storage_allocated) { - viz::TextureAllocation alloc = {texture_id, texture_target, - texture_is_overlay_candidate}; - viz::TextureAllocation::AllocateStorage( - ri, context_provider->ContextCapabilities(), resource_format, - resource_size, alloc, color_space); - } + mailbox->name); // TODO(enne): Use the |texture_target|? GpuMemoryBuffer backed textures don't // use GL_TEXTURE_2D. @@ -145,10 +154,11 @@ static void RasterizeSourceOOP( playback_settings.use_lcd_text, viz::ResourceFormatToClosestSkColorType( /*gpu_compositing=*/true, resource_format), - playback_settings.raster_color_space, mailbox.name); + playback_settings.raster_color_space, mailbox->name); float recording_to_raster_scale = transform.scale() / raster_source->recording_scale_factor(); gfx::Size content_size = raster_source->GetContentSize(transform.scale()); + // TODO(enne): could skip the clear on new textures, as the service side has // to do that anyway. resource_has_previous_content implies that the texture // is not new, but the reverse does not hold, so more plumbing is needed. @@ -168,10 +178,10 @@ static void RasterizeSourceOOP( static void RasterizeSource( const RasterSource* raster_source, bool resource_has_previous_content, - const gpu::Mailbox& mailbox, + gpu::Mailbox* mailbox, + const gpu::SyncToken& sync_token, GLenum texture_target, bool texture_is_overlay_candidate, - bool texture_storage_allocated, const gfx::Size& resource_size, viz::ResourceFormat resource_format, const gfx::ColorSpace& color_space, @@ -184,17 +194,24 @@ static void RasterizeSource( bool unpremultiply_and_dither, const gfx::Size& max_tile_size) { gpu::raster::RasterInterface* ri = context_provider->RasterInterface(); + if (mailbox->IsZero()) { + auto* sii = context_provider->SharedImageInterface(); + uint32_t flags = gpu::SHARED_IMAGE_USAGE_GLES2 | + gpu::SHARED_IMAGE_USAGE_GLES2_FRAMEBUFFER_HINT; + if (texture_is_overlay_candidate) + flags |= gpu::SHARED_IMAGE_USAGE_SCANOUT; + *mailbox = sii->CreateSharedImage(resource_format, resource_size, + color_space, flags); + ri->WaitSyncTokenCHROMIUM(sii->GenUnverifiedSyncToken().GetConstData()); + } else { + // Wait on the SyncToken that was created on the compositor thread after + // making the mailbox. This ensures that the mailbox we consume here is + // valid by the time the consume command executes. + ri->WaitSyncTokenCHROMIUM(sync_token.GetConstData()); + } GLuint texture_id = ri->CreateAndConsumeTexture( texture_is_overlay_candidate, gfx::BufferUsage::SCANOUT, resource_format, - mailbox.name); - if (!texture_storage_allocated) { - viz::TextureAllocation alloc = {texture_id, texture_target, - texture_is_overlay_candidate}; - viz::TextureAllocation::AllocateStorage( - ri, context_provider->ContextCapabilities(), resource_format, - resource_size, alloc, color_space); - } - + mailbox->name); { ScopedGrContextAccess gr_context_access(context_provider); base::Optional<viz::ClientResourceProvider::ScopedSkSurface> scoped_surface; @@ -244,13 +261,13 @@ class GpuRasterBufferProvider::GpuRasterBacking : public ResourcePool::GpuBacking { public: ~GpuRasterBacking() override { - gpu::gles2::GLES2Interface* gl = compositor_context_provider->ContextGL(); + if (mailbox.IsZero()) + return; + auto* sii = worker_context_provider->SharedImageInterface(); if (returned_sync_token.HasData()) - gl->WaitSyncTokenCHROMIUM(returned_sync_token.GetConstData()); - if (mailbox_sync_token.HasData()) - gl->WaitSyncTokenCHROMIUM(mailbox_sync_token.GetConstData()); - if (texture_id) - gl->DeleteTextures(1, &texture_id); + sii->DestroySharedImage(returned_sync_token, mailbox); + else if (mailbox_sync_token.HasData()) + sii->DestroySharedImage(mailbox_sync_token, mailbox); } void OnMemoryDump( @@ -258,23 +275,16 @@ class GpuRasterBufferProvider::GpuRasterBacking const base::trace_event::MemoryAllocatorDumpGuid& buffer_dump_guid, uint64_t tracing_process_id, int importance) const override { - if (!storage_allocated) + if (mailbox.IsZero()) return; - auto texture_tracing_guid = gl::GetGLTextureClientGUIDForTracing( - compositor_context_provider->ContextSupport()->ShareGroupTracingGUID(), - texture_id); - pmd->CreateSharedGlobalAllocatorDump(texture_tracing_guid); - pmd->AddOwnershipEdge(buffer_dump_guid, texture_tracing_guid, importance); + auto tracing_guid = gpu::GetSharedImageGUIDForTracing(mailbox); + pmd->CreateSharedGlobalAllocatorDump(tracing_guid); + pmd->AddOwnershipEdge(buffer_dump_guid, tracing_guid, importance); } - // The ContextProvider used to clean up the texture id. - viz::ContextProvider* compositor_context_provider = nullptr; - // The texture backing of the resource. - GLuint texture_id = 0; - // The allocation of storage for the |texture_id| is deferred, and this tracks - // if it has been done. - bool storage_allocated = false; + // The ContextProvider used to clean up the mailbox + viz::RasterContextProvider* worker_context_provider = nullptr; }; GpuRasterBufferProvider::RasterBufferImpl::RasterBufferImpl( @@ -289,10 +299,9 @@ GpuRasterBufferProvider::RasterBufferImpl::RasterBufferImpl( color_space_(in_use_resource.color_space()), resource_has_previous_content_(resource_has_previous_content), before_raster_sync_token_(backing->returned_sync_token), - mailbox_(backing->mailbox), texture_target_(backing->texture_target), texture_is_overlay_candidate_(backing->overlay_candidate), - texture_storage_allocated_(backing->storage_allocated) {} + mailbox_(backing->mailbox) {} GpuRasterBufferProvider::RasterBufferImpl::~RasterBufferImpl() { // This SyncToken was created on the worker context after rastering the @@ -303,7 +312,7 @@ GpuRasterBufferProvider::RasterBufferImpl::~RasterBufferImpl() { // happened if the |after_raster_sync_token_| was set. backing_->returned_sync_token = gpu::SyncToken(); } - backing_->storage_allocated = texture_storage_allocated_; + backing_->mailbox = mailbox_; } void GpuRasterBufferProvider::RasterBufferImpl::Playback( @@ -320,12 +329,10 @@ void GpuRasterBufferProvider::RasterBufferImpl::Playback( // returns another SyncToken generated on the worker thread to synchronize // with after the raster is complete. after_raster_sync_token_ = client_->PlaybackOnWorkerThread( - mailbox_, texture_target_, texture_is_overlay_candidate_, - texture_storage_allocated_, before_raster_sync_token_, resource_size_, - resource_format_, color_space_, resource_has_previous_content_, - raster_source, raster_full_rect, raster_dirty_rect, new_content_id, - transform, playback_settings, url); - texture_storage_allocated_ = true; + &mailbox_, texture_target_, texture_is_overlay_candidate_, + before_raster_sync_token_, resource_size_, resource_format_, color_space_, + resource_has_previous_content_, raster_source, raster_full_rect, + raster_dirty_rect, new_content_id, transform, playback_settings, url); } GpuRasterBufferProvider::GpuRasterBufferProvider( @@ -336,7 +343,8 @@ GpuRasterBufferProvider::GpuRasterBufferProvider( viz::ResourceFormat tile_format, const gfx::Size& max_tile_size, bool unpremultiply_and_dither_low_bit_depth_tiles, - bool enable_oop_rasterization) + bool enable_oop_rasterization, + int raster_metric_frequency) : compositor_context_provider_(compositor_context_provider), worker_context_provider_(worker_context_provider), use_gpu_memory_buffer_resources_(use_gpu_memory_buffer_resources), @@ -345,7 +353,10 @@ GpuRasterBufferProvider::GpuRasterBufferProvider( max_tile_size_(max_tile_size), unpremultiply_and_dither_low_bit_depth_tiles_( unpremultiply_and_dither_low_bit_depth_tiles), - enable_oop_rasterization_(enable_oop_rasterization) { + enable_oop_rasterization_(enable_oop_rasterization), + raster_metric_frequency_(raster_metric_frequency), + random_generator_(base::RandUint64()), + uniform_distribution_(1, raster_metric_frequency) { DCHECK(compositor_context_provider); DCHECK(worker_context_provider); } @@ -359,24 +370,10 @@ std::unique_ptr<RasterBuffer> GpuRasterBufferProvider::AcquireBufferForRaster( uint64_t previous_content_id) { if (!resource.gpu_backing()) { auto backing = std::make_unique<GpuRasterBacking>(); - backing->compositor_context_provider = compositor_context_provider_; - - gpu::gles2::GLES2Interface* gl = compositor_context_provider_->ContextGL(); - const auto& caps = compositor_context_provider_->ContextCapabilities(); - - viz::TextureAllocation alloc = viz::TextureAllocation::MakeTextureId( - gl, caps, resource.format(), use_gpu_memory_buffer_resources_, - /*for_framebuffer_attachment=*/true); - backing->texture_id = alloc.texture_id; - backing->texture_target = alloc.texture_target; - backing->overlay_candidate = alloc.overlay_candidate; - gl->ProduceTextureDirectCHROMIUM(backing->texture_id, - backing->mailbox.name); - // Save a sync token in the backing so that we always wait on it even if - // this task is cancelled between being scheduled and running. - backing->returned_sync_token = - viz::ClientResourceProvider::GenerateSyncTokenHelper(gl); - + backing->worker_context_provider = worker_context_provider_; + backing->InitOverlayCandidateAndTextureTarget( + resource.format(), compositor_context_provider_->ContextCapabilities(), + use_gpu_memory_buffer_resources_); resource.set_gpu_backing(std::move(backing)); } GpuRasterBacking* backing = @@ -454,10 +451,9 @@ void GpuRasterBufferProvider::Shutdown() { } gpu::SyncToken GpuRasterBufferProvider::PlaybackOnWorkerThread( - const gpu::Mailbox& mailbox, + gpu::Mailbox* mailbox, GLenum texture_target, bool texture_is_overlay_candidate, - bool texture_storage_allocated, const gpu::SyncToken& sync_token, const gfx::Size& resource_size, viz::ResourceFormat resource_format, @@ -470,15 +466,50 @@ gpu::SyncToken GpuRasterBufferProvider::PlaybackOnWorkerThread( const gfx::AxisTransform2d& transform, const RasterSource::PlaybackSettings& playback_settings, const GURL& url) { + PendingRasterQuery query; + gpu::SyncToken raster_finished_token = PlaybackOnWorkerThreadInternal( + mailbox, texture_target, texture_is_overlay_candidate, sync_token, + resource_size, resource_format, color_space, + resource_has_previous_content, raster_source, raster_full_rect, + raster_dirty_rect, new_content_id, transform, playback_settings, url, + &query); + + if (query.query_id != 0u) { + // Note that it is important to scope the raster context lock to + // PlaybackOnWorkerThreadInternal and release it before acquiring this lock + // to avoid a deadlock in CheckRasterFinishedQueries which acquires the + // raster context lock while holding this lock. + base::AutoLock hold(pending_raster_queries_lock_); + pending_raster_queries_.push_back(query); + } + + return raster_finished_token; +} + +gpu::SyncToken GpuRasterBufferProvider::PlaybackOnWorkerThreadInternal( + gpu::Mailbox* mailbox, + GLenum texture_target, + bool texture_is_overlay_candidate, + const gpu::SyncToken& sync_token, + const gfx::Size& resource_size, + viz::ResourceFormat resource_format, + const gfx::ColorSpace& color_space, + bool resource_has_previous_content, + const RasterSource* raster_source, + const gfx::Rect& raster_full_rect, + const gfx::Rect& raster_dirty_rect, + uint64_t new_content_id, + const gfx::AxisTransform2d& transform, + const RasterSource::PlaybackSettings& playback_settings, + const GURL& url, + PendingRasterQuery* query) { viz::RasterContextProvider::ScopedRasterContextLock scoped_context( worker_context_provider_, url.possibly_invalid_spec().c_str()); gpu::raster::RasterInterface* ri = scoped_context.RasterInterface(); DCHECK(ri); - // Wait on the SyncToken that was created on the compositor thread after - // making the mailbox. This ensures that the mailbox we consume here is valid - // by the time the consume command executes. - ri->WaitSyncTokenCHROMIUM(sync_token.GetConstData()); + const bool measure_raster_metric = + uniform_distribution_(random_generator_) == raster_metric_frequency_; gfx::Rect playback_rect = raster_full_rect; if (resource_has_previous_content) { @@ -501,23 +532,39 @@ gpu::SyncToken GpuRasterBufferProvider::PlaybackOnWorkerThread( 100.0f * fraction_saved); } - if (enable_oop_rasterization_) { - RasterizeSourceOOP(raster_source, resource_has_previous_content, mailbox, - texture_target, texture_is_overlay_candidate, - texture_storage_allocated, resource_size, - resource_format, color_space, raster_full_rect, - playback_rect, transform, playback_settings, - worker_context_provider_, msaa_sample_count_); - } else { - RasterizeSource( - raster_source, resource_has_previous_content, mailbox, texture_target, - texture_is_overlay_candidate, texture_storage_allocated, resource_size, - resource_format, color_space, raster_full_rect, playback_rect, - transform, playback_settings, worker_context_provider_, - msaa_sample_count_, - ShouldUnpremultiplyAndDitherResource(resource_format), max_tile_size_); + if (measure_raster_metric) { + // Use a query to time the GPU side work for rasterizing this tile. + ri->GenQueriesEXT(1, &query->query_id); + ri->BeginQueryEXT(GL_COMMANDS_ISSUED_CHROMIUM, query->query_id); + } + + { + base::Optional<base::ElapsedTimer> timer; + if (measure_raster_metric) + timer.emplace(); + if (enable_oop_rasterization_) { + RasterizeSourceOOP(raster_source, resource_has_previous_content, mailbox, + sync_token, texture_target, + texture_is_overlay_candidate, resource_size, + resource_format, color_space, raster_full_rect, + playback_rect, transform, playback_settings, + worker_context_provider_, msaa_sample_count_); + } else { + RasterizeSource(raster_source, resource_has_previous_content, mailbox, + sync_token, texture_target, texture_is_overlay_candidate, + resource_size, resource_format, color_space, + raster_full_rect, playback_rect, transform, + playback_settings, worker_context_provider_, + msaa_sample_count_, + ShouldUnpremultiplyAndDitherResource(resource_format), + max_tile_size_); + } + if (measure_raster_metric) + query->worker_duration = timer->Elapsed(); } + ri->EndQueryEXT(GL_COMMANDS_ISSUED_CHROMIUM); + // Generate sync token for cross context synchronization. return viz::ClientResourceProvider::GenerateSyncTokenHelper(ri); } @@ -532,4 +579,56 @@ bool GpuRasterBufferProvider::ShouldUnpremultiplyAndDitherResource( } } +#define UMA_HISTOGRAM_RASTER_TIME_CUSTOM_MICROSECONDS(name, total_time) \ + UMA_HISTOGRAM_CUSTOM_MICROSECONDS_TIMES( \ + name, total_time, base::TimeDelta::FromMicroseconds(1), \ + base::TimeDelta::FromMilliseconds(100), 100); + +bool GpuRasterBufferProvider::CheckRasterFinishedQueries() { + base::AutoLock hold(pending_raster_queries_lock_); + if (pending_raster_queries_.empty()) + return false; + + viz::RasterContextProvider::ScopedRasterContextLock scoped_context( + worker_context_provider_); + auto* ri = scoped_context.RasterInterface(); + + auto it = pending_raster_queries_.begin(); + while (it != pending_raster_queries_.end()) { + GLuint complete = 1; + ri->GetQueryObjectuivEXT(it->query_id, + GL_QUERY_RESULT_AVAILABLE_NO_FLUSH_CHROMIUM_EXT, + &complete); + if (!complete) + break; + + GLuint gpu_duration = 0u; + ri->GetQueryObjectuivEXT(it->query_id, GL_QUERY_RESULT_EXT, &gpu_duration); + ri->DeleteQueriesEXT(1, &it->query_id); + + base::TimeDelta total_time = + it->worker_duration + base::TimeDelta::FromMicroseconds(gpu_duration); + + // It is safe to use the UMA macros here with runtime generated strings + // because the client name should be initialized once in the process, before + // recording any metrics here. + const char* client_name = GetClientNameForMetrics(); + if (enable_oop_rasterization_) { + UMA_HISTOGRAM_RASTER_TIME_CUSTOM_MICROSECONDS( + base::StringPrintf("Renderer4.%s.RasterTaskTotalDuration.Oop", + client_name), + total_time); + } else { + UMA_HISTOGRAM_RASTER_TIME_CUSTOM_MICROSECONDS( + base::StringPrintf("Renderer4.%s.RasterTaskTotalDuration.Gpu", + client_name), + total_time); + } + + it = pending_raster_queries_.erase(it); + } + + return pending_raster_queries_.size() > 0u; +} + } // namespace cc diff --git a/chromium/cc/raster/gpu_raster_buffer_provider.h b/chromium/cc/raster/gpu_raster_buffer_provider.h index 54c4fc9db4b..0ef41dd0bea 100644 --- a/chromium/cc/raster/gpu_raster_buffer_provider.h +++ b/chromium/cc/raster/gpu_raster_buffer_provider.h @@ -6,11 +6,18 @@ #define CC_RASTER_GPU_RASTER_BUFFER_PROVIDER_H_ #include <stdint.h> +#include <random> #include "base/macros.h" #include "cc/raster/raster_buffer_provider.h" #include "gpu/command_buffer/common/sync_token.h" +namespace gpu { +namespace raster { +class RasterInterface; +} // namespace raster +} // namespace gpu + namespace viz { class ContextProvider; class RasterContextProvider; @@ -20,6 +27,7 @@ namespace cc { class CC_EXPORT GpuRasterBufferProvider : public RasterBufferProvider { public: + static constexpr int kRasterMetricFrequency = 100; GpuRasterBufferProvider(viz::ContextProvider* compositor_context_provider, viz::RasterContextProvider* worker_context_provider, bool use_gpu_memory_buffer_resources, @@ -27,7 +35,8 @@ class CC_EXPORT GpuRasterBufferProvider : public RasterBufferProvider { viz::ResourceFormat tile_format, const gfx::Size& max_tile_size, bool unpremultiply_and_dither_low_bit_depth_tiles, - bool enable_oop_rasterization); + bool enable_oop_rasterization, + int raster_metric_frequency = kRasterMetricFrequency); ~GpuRasterBufferProvider() override; // Overridden from RasterBufferProvider: @@ -47,12 +56,12 @@ class CC_EXPORT GpuRasterBufferProvider : public RasterBufferProvider { const base::Closure& callback, uint64_t pending_callback_id) const override; void Shutdown() override; + bool CheckRasterFinishedQueries() override; gpu::SyncToken PlaybackOnWorkerThread( - const gpu::Mailbox& mailbox, + gpu::Mailbox* mailbox, GLenum texture_target, bool texture_is_overlay_candidate, - bool texture_storage_allocated, const gpu::SyncToken& sync_token, const gfx::Size& resource_size, viz::ResourceFormat resource_format, @@ -97,11 +106,10 @@ class CC_EXPORT GpuRasterBufferProvider : public RasterBufferProvider { const gfx::ColorSpace color_space_; const bool resource_has_previous_content_; const gpu::SyncToken before_raster_sync_token_; - const gpu::Mailbox mailbox_; const GLenum texture_target_; const bool texture_is_overlay_candidate_; - // Set to true once allocation is done in the worker thread. - bool texture_storage_allocated_; + + gpu::Mailbox mailbox_; // A SyncToken to be returned from the worker thread, and waited on before // using the rastered resource. gpu::SyncToken after_raster_sync_token_; @@ -109,7 +117,32 @@ class CC_EXPORT GpuRasterBufferProvider : public RasterBufferProvider { DISALLOW_COPY_AND_ASSIGN(RasterBufferImpl); }; + struct PendingRasterQuery { + // The id for querying the duration in executing the GPU side work. + GLuint query_id = 0u; + + // The duration for executing the work on the raster worker thread. + base::TimeDelta worker_duration; + }; + bool ShouldUnpremultiplyAndDitherResource(viz::ResourceFormat format) const; + gpu::SyncToken PlaybackOnWorkerThreadInternal( + gpu::Mailbox* mailbox, + GLenum texture_target, + bool texture_is_overlay_candidate, + const gpu::SyncToken& sync_token, + const gfx::Size& resource_size, + viz::ResourceFormat resource_format, + const gfx::ColorSpace& color_space, + bool resource_has_previous_content, + const RasterSource* raster_source, + const gfx::Rect& raster_full_rect, + const gfx::Rect& raster_dirty_rect, + uint64_t new_content_id, + const gfx::AxisTransform2d& transform, + const RasterSource::PlaybackSettings& playback_settings, + const GURL& url, + PendingRasterQuery* query); viz::ContextProvider* const compositor_context_provider_; viz::RasterContextProvider* const worker_context_provider_; @@ -119,6 +152,17 @@ class CC_EXPORT GpuRasterBufferProvider : public RasterBufferProvider { const gfx::Size max_tile_size_; const bool unpremultiply_and_dither_low_bit_depth_tiles_; const bool enable_oop_rasterization_; + const int raster_metric_frequency_; + + // Note that this lock should never be acquired while holding the raster + // context lock. + base::Lock pending_raster_queries_lock_; + base::circular_deque<PendingRasterQuery> pending_raster_queries_ + GUARDED_BY(pending_raster_queries_lock_); + + // Accessed with the worker context lock acquired. + std::mt19937 random_generator_; + std::uniform_int_distribution<int> uniform_distribution_; DISALLOW_COPY_AND_ASSIGN(GpuRasterBufferProvider); }; diff --git a/chromium/cc/raster/one_copy_raster_buffer_provider.cc b/chromium/cc/raster/one_copy_raster_buffer_provider.cc index bf284018663..562d2e21bfe 100644 --- a/chromium/cc/raster/one_copy_raster_buffer_provider.cc +++ b/chromium/cc/raster/one_copy_raster_buffer_provider.cc @@ -13,21 +13,23 @@ #include "base/debug/alias.h" #include "base/macros.h" #include "base/metrics/histogram_macros.h" +#include "base/strings/stringprintf.h" #include "base/trace_event/process_memory_dump.h" #include "base/trace_event/trace_event.h" #include "cc/base/histograms.h" #include "cc/base/math_util.h" #include "components/viz/common/gpu/context_provider.h" #include "components/viz/common/gpu/raster_context_provider.h" -#include "components/viz/common/gpu/texture_allocation.h" #include "components/viz/common/resources/platform_color.h" #include "components/viz/common/resources/resource_format.h" #include "components/viz/common/resources/resource_sizes.h" #include "gpu/GLES2/gl2extchromium.h" #include "gpu/command_buffer/client/context_support.h" -#include "gpu/command_buffer/client/gles2_interface.h" #include "gpu/command_buffer/client/gpu_memory_buffer_manager.h" #include "gpu/command_buffer/client/raster_interface.h" +#include "gpu/command_buffer/client/shared_image_interface.h" +#include "gpu/command_buffer/common/shared_image_trace_utils.h" +#include "gpu/command_buffer/common/shared_image_usage.h" #include "ui/gfx/buffer_format_util.h" #include "ui/gl/trace_util.h" @@ -46,13 +48,13 @@ class OneCopyRasterBufferProvider::OneCopyGpuBacking : public ResourcePool::GpuBacking { public: ~OneCopyGpuBacking() override { - gpu::gles2::GLES2Interface* gl = compositor_context_provider->ContextGL(); + if (mailbox.IsZero()) + return; + auto* sii = worker_context_provider->SharedImageInterface(); if (returned_sync_token.HasData()) - gl->WaitSyncTokenCHROMIUM(returned_sync_token.GetConstData()); - if (mailbox_sync_token.HasData()) - gl->WaitSyncTokenCHROMIUM(mailbox_sync_token.GetConstData()); - if (texture_id) - gl->DeleteTextures(1, &texture_id); + sii->DestroySharedImage(returned_sync_token, mailbox); + else if (mailbox_sync_token.HasData()) + sii->DestroySharedImage(mailbox_sync_token, mailbox); } void OnMemoryDump( @@ -60,23 +62,16 @@ class OneCopyRasterBufferProvider::OneCopyGpuBacking const base::trace_event::MemoryAllocatorDumpGuid& buffer_dump_guid, uint64_t tracing_process_id, int importance) const override { - if (!storage_allocated) + if (mailbox.IsZero()) return; - auto texture_tracing_guid = gl::GetGLTextureClientGUIDForTracing( - compositor_context_provider->ContextSupport()->ShareGroupTracingGUID(), - texture_id); - pmd->CreateSharedGlobalAllocatorDump(texture_tracing_guid); - pmd->AddOwnershipEdge(buffer_dump_guid, texture_tracing_guid, importance); + auto tracing_guid = gpu::GetSharedImageGUIDForTracing(mailbox); + pmd->CreateSharedGlobalAllocatorDump(tracing_guid); + pmd->AddOwnershipEdge(buffer_dump_guid, tracing_guid, importance); } - // The ContextProvider used to clean up the texture id. - viz::ContextProvider* compositor_context_provider = nullptr; - // The texture backing of the resource. - GLuint texture_id = 0; - // The allocation of storage for the |texture_id| is deferred, and this tracks - // if it has been done. - bool storage_allocated = false; + // The ContextProvider used to clean up the mailbox + viz::RasterContextProvider* worker_context_provider = nullptr; }; OneCopyRasterBufferProvider::RasterBufferImpl::RasterBufferImpl( @@ -94,8 +89,7 @@ OneCopyRasterBufferProvider::RasterBufferImpl::RasterBufferImpl( before_raster_sync_token_(backing->returned_sync_token), mailbox_(backing->mailbox), mailbox_texture_target_(backing->texture_target), - mailbox_texture_is_overlay_candidate_(backing->overlay_candidate), - mailbox_texture_storage_allocated_(backing->storage_allocated) {} + mailbox_texture_is_overlay_candidate_(backing->overlay_candidate) {} OneCopyRasterBufferProvider::RasterBufferImpl::~RasterBufferImpl() { // This SyncToken was created on the worker context after uploading the @@ -106,7 +100,7 @@ OneCopyRasterBufferProvider::RasterBufferImpl::~RasterBufferImpl() { // happened if the |after_raster_sync_token_| was set. backing_->returned_sync_token = gpu::SyncToken(); } - backing_->storage_allocated = mailbox_texture_storage_allocated_; + backing_->mailbox = mailbox_; } void OneCopyRasterBufferProvider::RasterBufferImpl::Playback( @@ -123,12 +117,10 @@ void OneCopyRasterBufferProvider::RasterBufferImpl::Playback( // returns another SyncToken generated on the worker thread to synchronize // with after the raster is complete. after_raster_sync_token_ = client_->PlaybackAndCopyOnWorkerThread( - mailbox_, mailbox_texture_target_, mailbox_texture_is_overlay_candidate_, - mailbox_texture_storage_allocated_, before_raster_sync_token_, - raster_source, raster_full_rect, raster_dirty_rect, transform, - resource_size_, resource_format_, color_space_, playback_settings, - previous_content_id_, new_content_id); - mailbox_texture_storage_allocated_ = true; + &mailbox_, mailbox_texture_target_, mailbox_texture_is_overlay_candidate_, + before_raster_sync_token_, raster_source, raster_full_rect, + raster_dirty_rect, transform, resource_size_, resource_format_, + color_space_, playback_settings, previous_content_id_, new_content_id); } OneCopyRasterBufferProvider::OneCopyRasterBufferProvider( @@ -159,6 +151,7 @@ OneCopyRasterBufferProvider::OneCopyRasterBufferProvider( max_staging_buffer_usage_in_bytes) { DCHECK(compositor_context_provider); DCHECK(worker_context_provider); + DCHECK(!IsResourceFormatCompressed(tile_format)); } OneCopyRasterBufferProvider::~OneCopyRasterBufferProvider() {} @@ -170,24 +163,10 @@ OneCopyRasterBufferProvider::AcquireBufferForRaster( uint64_t previous_content_id) { if (!resource.gpu_backing()) { auto backing = std::make_unique<OneCopyGpuBacking>(); - backing->compositor_context_provider = compositor_context_provider_; - - gpu::gles2::GLES2Interface* gl = compositor_context_provider_->ContextGL(); - const auto& caps = compositor_context_provider_->ContextCapabilities(); - - viz::TextureAllocation alloc = viz::TextureAllocation::MakeTextureId( - gl, caps, resource.format(), use_gpu_memory_buffer_resources_, - /*for_framebuffer_attachment=*/false); - backing->texture_id = alloc.texture_id; - backing->texture_target = alloc.texture_target; - backing->overlay_candidate = alloc.overlay_candidate; - gl->ProduceTextureDirectCHROMIUM(backing->texture_id, - backing->mailbox.name); - // Save a sync token in the backing so that we always wait on it even if - // this task is cancelled between being scheduled and running. - backing->returned_sync_token = - viz::ClientResourceProvider::GenerateSyncTokenHelper(gl); - + backing->worker_context_provider = worker_context_provider_; + backing->InitOverlayCandidateAndTextureTarget( + resource.format(), compositor_context_provider_->ContextCapabilities(), + use_gpu_memory_buffer_resources_); resource.set_gpu_backing(std::move(backing)); } OneCopyGpuBacking* backing = @@ -272,10 +251,9 @@ void OneCopyRasterBufferProvider::Shutdown() { } gpu::SyncToken OneCopyRasterBufferProvider::PlaybackAndCopyOnWorkerThread( - const gpu::Mailbox& mailbox, + gpu::Mailbox* mailbox, GLenum mailbox_texture_target, bool mailbox_texture_is_overlay_candidate, - bool mailbox_texture_storage_allocated, const gpu::SyncToken& sync_token, const RasterSource* raster_source, const gfx::Rect& raster_full_rect, @@ -299,8 +277,7 @@ gpu::SyncToken OneCopyRasterBufferProvider::PlaybackAndCopyOnWorkerThread( gpu::SyncToken sync_token_after_upload = CopyOnWorkerThread( staging_buffer.get(), raster_source, raster_full_rect, resource_format, resource_size, mailbox, mailbox_texture_target, - mailbox_texture_is_overlay_candidate, mailbox_texture_storage_allocated, - sync_token, color_space); + mailbox_texture_is_overlay_candidate, sync_token, color_space); staging_pool_.ReleaseStagingBuffer(std::move(staging_buffer)); return sync_token_after_upload; } @@ -384,10 +361,9 @@ gpu::SyncToken OneCopyRasterBufferProvider::CopyOnWorkerThread( const gfx::Rect& rect_to_copy, viz::ResourceFormat resource_format, const gfx::Size& resource_size, - const gpu::Mailbox& mailbox, + gpu::Mailbox* mailbox, GLenum mailbox_texture_target, bool mailbox_texture_is_overlay_candidate, - bool mailbox_texture_storage_allocated, const gpu::SyncToken& sync_token, const gfx::ColorSpace& color_space) { viz::RasterContextProvider::ScopedRasterContextLock scoped_context( @@ -395,21 +371,21 @@ gpu::SyncToken OneCopyRasterBufferProvider::CopyOnWorkerThread( gpu::raster::RasterInterface* ri = scoped_context.RasterInterface(); DCHECK(ri); - // Wait on the SyncToken that was created on the compositor thread after - // making the mailbox. This ensures that the mailbox we consume here is valid - // by the time the consume command executes. - ri->WaitSyncTokenCHROMIUM(sync_token.GetConstData()); + if (mailbox->IsZero()) { + auto* sii = worker_context_provider_->SharedImageInterface(); + uint32_t flags = gpu::SHARED_IMAGE_USAGE_RASTER; + if (mailbox_texture_is_overlay_candidate) + flags |= gpu::SHARED_IMAGE_USAGE_SCANOUT; + *mailbox = sii->CreateSharedImage(resource_format, resource_size, + color_space, flags); + ri->WaitSyncTokenCHROMIUM(sii->GenUnverifiedSyncToken().GetConstData()); + } else { + ri->WaitSyncTokenCHROMIUM(sync_token.GetConstData()); + } + GLuint mailbox_texture_id = ri->CreateAndConsumeTexture( mailbox_texture_is_overlay_candidate, gfx::BufferUsage::SCANOUT, - resource_format, mailbox.name); - - if (!mailbox_texture_storage_allocated) { - viz::TextureAllocation alloc = {mailbox_texture_id, mailbox_texture_target, - mailbox_texture_is_overlay_candidate}; - viz::TextureAllocation::AllocateStorage( - ri, worker_context_provider_->ContextCapabilities(), resource_format, - resource_size, alloc, color_space); - } + resource_format, mailbox->name); // Create and bind staging texture. if (!staging_buffer->texture_id) { @@ -446,61 +422,68 @@ gpu::SyncToken OneCopyRasterBufferProvider::CopyOnWorkerThread( // TODO(vmiura): Need a way to ensure we don't hold onto bindings? // ri->BindTexture(image_target, 0); + // Do not use queries unless COMMANDS_COMPLETED queries are supported, or + // COMMANDS_ISSUED queries are sufficient. + GLenum query_target = GL_NONE; + if (worker_context_provider_->ContextCapabilities().sync_query) { - if (!staging_buffer->query_id) - ri->GenQueriesEXT(1, &staging_buffer->query_id); + // Use GL_COMMANDS_COMPLETED_CHROMIUM when supported because native + // GpuMemoryBuffers can be accessed by the GPU after commands are issued + // until GPU reads are done. + query_target = GL_COMMANDS_COMPLETED_CHROMIUM; + } #if defined(OS_CHROMEOS) && defined(ARCH_CPU_ARM_FAMILY) - // TODO(reveman): This avoids a performance problem on ARM ChromeOS - // devices. crbug.com/580166 - ri->BeginQueryEXT(GL_COMMANDS_ISSUED_CHROMIUM, staging_buffer->query_id); -#else - ri->BeginQueryEXT(GL_COMMANDS_COMPLETED_CHROMIUM, staging_buffer->query_id); + // TODO(reveman): This avoids a performance problem on ARM ChromeOS devices. + // https://crbug.com/580166 + query_target = GL_COMMANDS_ISSUED_CHROMIUM; #endif + + // COMMANDS_ISSUED is sufficient for shared memory GpuMemoryBuffers because + // they're uploaded using glTexImage2D (see gl::GLImageMemory::CopyTexImage). + const auto* buffer = staging_buffer->gpu_memory_buffer.get(); + if (buffer && + buffer->GetType() == gfx::GpuMemoryBufferType::SHARED_MEMORY_BUFFER) { + query_target = GL_COMMANDS_ISSUED_CHROMIUM; } - // Since compressed texture's cannot be pre-allocated we might have an - // unallocated resource in which case we need to perform a full size copy. - if (IsResourceFormatCompressed(staging_buffer->format)) { - ri->CompressedCopyTextureCHROMIUM(staging_buffer->texture_id, - mailbox_texture_id); - } else { - int bytes_per_row = viz::ResourceSizes::UncheckedWidthInBytes<int>( - rect_to_copy.width(), staging_buffer->format); - int chunk_size_in_rows = - std::max(1, max_bytes_per_copy_operation_ / bytes_per_row); - // Align chunk size to 4. Required to support compressed texture formats. - chunk_size_in_rows = MathUtil::UncheckedRoundUp(chunk_size_in_rows, 4); - int y = 0; - int height = rect_to_copy.height(); - while (y < height) { - // Copy at most |chunk_size_in_rows|. - int rows_to_copy = std::min(chunk_size_in_rows, height - y); - DCHECK_GT(rows_to_copy, 0); - - ri->CopySubTexture(staging_buffer->texture_id, mailbox_texture_id, 0, y, - 0, y, rect_to_copy.width(), rows_to_copy); - y += rows_to_copy; - - // Increment |bytes_scheduled_since_last_flush_| by the amount of memory - // used for this copy operation. - bytes_scheduled_since_last_flush_ += rows_to_copy * bytes_per_row; - - if (bytes_scheduled_since_last_flush_ >= max_bytes_per_copy_operation_) { - ri->ShallowFlushCHROMIUM(); - bytes_scheduled_since_last_flush_ = 0; - } - } + if (query_target != GL_NONE) { + if (!staging_buffer->query_id) + ri->GenQueriesEXT(1, &staging_buffer->query_id); + + ri->BeginQueryEXT(query_target, staging_buffer->query_id); } - if (worker_context_provider_->ContextCapabilities().sync_query) { -#if defined(OS_CHROMEOS) && defined(ARCH_CPU_ARM_FAMILY) - ri->EndQueryEXT(GL_COMMANDS_ISSUED_CHROMIUM); -#else - ri->EndQueryEXT(GL_COMMANDS_COMPLETED_CHROMIUM); -#endif + int bytes_per_row = viz::ResourceSizes::UncheckedWidthInBytes<int>( + rect_to_copy.width(), staging_buffer->format); + int chunk_size_in_rows = + std::max(1, max_bytes_per_copy_operation_ / bytes_per_row); + // Align chunk size to 4. Required to support compressed texture formats. + chunk_size_in_rows = MathUtil::UncheckedRoundUp(chunk_size_in_rows, 4); + int y = 0; + int height = rect_to_copy.height(); + while (y < height) { + // Copy at most |chunk_size_in_rows|. + int rows_to_copy = std::min(chunk_size_in_rows, height - y); + DCHECK_GT(rows_to_copy, 0); + + ri->CopySubTexture(staging_buffer->texture_id, mailbox_texture_id, 0, y, 0, + y, rect_to_copy.width(), rows_to_copy); + y += rows_to_copy; + + // Increment |bytes_scheduled_since_last_flush_| by the amount of memory + // used for this copy operation. + bytes_scheduled_since_last_flush_ += rows_to_copy * bytes_per_row; + + if (bytes_scheduled_since_last_flush_ >= max_bytes_per_copy_operation_) { + ri->ShallowFlushCHROMIUM(); + bytes_scheduled_since_last_flush_ = 0; + } } + if (query_target != GL_NONE) + ri->EndQueryEXT(query_target); + ri->DeleteTextures(1, &mailbox_texture_id); // Generate sync token on the worker context that will be sent to and waited @@ -514,4 +497,8 @@ gfx::BufferUsage OneCopyRasterBufferProvider::StagingBufferUsage() const { : gfx::BufferUsage::GPU_READ_CPU_READ_WRITE; } +bool OneCopyRasterBufferProvider::CheckRasterFinishedQueries() { + return false; +} + } // namespace cc diff --git a/chromium/cc/raster/one_copy_raster_buffer_provider.h b/chromium/cc/raster/one_copy_raster_buffer_provider.h index e23dc8ddb88..6766cae7a40 100644 --- a/chromium/cc/raster/one_copy_raster_buffer_provider.h +++ b/chromium/cc/raster/one_copy_raster_buffer_provider.h @@ -58,13 +58,13 @@ class CC_EXPORT OneCopyRasterBufferProvider : public RasterBufferProvider { const base::Closure& callback, uint64_t pending_callback_id) const override; void Shutdown() override; + bool CheckRasterFinishedQueries() override; // Playback raster source and copy result into |resource|. gpu::SyncToken PlaybackAndCopyOnWorkerThread( - const gpu::Mailbox& mailbox, + gpu::Mailbox* mailbox, GLenum mailbox_texture_target, bool mailbox_texture_is_overlay_candidate, - bool mailbox_texture_storage_allocated, const gpu::SyncToken& sync_token, const RasterSource* raster_source, const gfx::Rect& raster_full_rect, @@ -109,11 +109,9 @@ class CC_EXPORT OneCopyRasterBufferProvider : public RasterBufferProvider { const gfx::ColorSpace color_space_; const uint64_t previous_content_id_; const gpu::SyncToken before_raster_sync_token_; - const gpu::Mailbox mailbox_; + gpu::Mailbox mailbox_; const GLenum mailbox_texture_target_; const bool mailbox_texture_is_overlay_candidate_; - // Set to true once allocation is done in the worker thread. - bool mailbox_texture_storage_allocated_; // A SyncToken to be returned from the worker thread, and waited on before // using the rastered resource. gpu::SyncToken after_raster_sync_token_; @@ -137,10 +135,9 @@ class CC_EXPORT OneCopyRasterBufferProvider : public RasterBufferProvider { const gfx::Rect& rect_to_copy, viz::ResourceFormat resource_format, const gfx::Size& resource_size, - const gpu::Mailbox& mailbox, + gpu::Mailbox* mailbox, GLenum mailbox_texture_target, bool mailbox_texture_is_overlay_candidate, - bool mailbox_texture_storage_allocated, const gpu::SyncToken& sync_token, const gfx::ColorSpace& color_space); gfx::BufferUsage StagingBufferUsage() const; diff --git a/chromium/cc/raster/raster_buffer_provider.cc b/chromium/cc/raster/raster_buffer_provider.cc index 57f2a6d01f3..991688679f7 100644 --- a/chromium/cc/raster/raster_buffer_provider.cc +++ b/chromium/cc/raster/raster_buffer_provider.cc @@ -8,7 +8,6 @@ #include "base/trace_event/trace_event.h" #include "cc/raster/raster_source.h" -#include "cc/raster/texture_compressor.h" #include "components/viz/common/resources/platform_color.h" #include "components/viz/common/resources/resource_format_utils.h" #include "third_party/skia/include/core/SkCanvas.h" @@ -29,11 +28,11 @@ bool IsSupportedPlaybackToMemoryFormat(viz::ResourceFormat format) { case viz::RGBA_4444: case viz::RGBA_8888: case viz::BGRA_8888: - case viz::ETC1: return true; case viz::ALPHA_8: case viz::LUMINANCE_8: case viz::RGB_565: + case viz::ETC1: case viz::RED_8: case viz::LUMINANCE_F16: case viz::RGBA_F16: @@ -115,27 +114,12 @@ void RasterBufferProvider::PlaybackToMemory( surface->getCanvas(), target_color_space, content_size, canvas_bitmap_rect, canvas_bitmap_rect, transform, playback_settings); - if (format == viz::ETC1) { - TRACE_EVENT0("cc", - "RasterBufferProvider::PlaybackToMemory::CompressETC1"); - DCHECK_EQ(size.width() % 4, 0); - DCHECK_EQ(size.height() % 4, 0); - std::unique_ptr<TextureCompressor> texture_compressor = - TextureCompressor::Create(TextureCompressor::kFormatETC1); - SkPixmap pixmap; - surface->peekPixels(&pixmap); - texture_compressor->Compress( - reinterpret_cast<const uint8_t*>(pixmap.addr()), - reinterpret_cast<uint8_t*>(memory), size.width(), size.height(), - TextureCompressor::kQualityHigh); - } else { - TRACE_EVENT0("cc", - "RasterBufferProvider::PlaybackToMemory::ConvertRGBA4444"); - SkImageInfo dst_info = info.makeColorType( - ResourceFormatToClosestSkColorType(gpu_compositing, format)); - bool rv = surface->readPixels(dst_info, memory, stride, 0, 0); - DCHECK(rv); - } + TRACE_EVENT0("cc", + "RasterBufferProvider::PlaybackToMemory::ConvertRGBA4444"); + SkImageInfo dst_info = info.makeColorType( + ResourceFormatToClosestSkColorType(gpu_compositing, format)); + bool rv = surface->readPixels(dst_info, memory, stride, 0, 0); + DCHECK(rv); return; } case viz::ETC1: diff --git a/chromium/cc/raster/raster_buffer_provider.h b/chromium/cc/raster/raster_buffer_provider.h index 3e2b1aaeb50..74354c7aceb 100644 --- a/chromium/cc/raster/raster_buffer_provider.h +++ b/chromium/cc/raster/raster_buffer_provider.h @@ -88,6 +88,14 @@ class CC_EXPORT RasterBufferProvider { // Shutdown for doing cleanup. virtual void Shutdown() = 0; + + // Checks whether GPU side queries issued for previous raster work have been + // finished. Note that this will acquire the worker context lock so it can be + // used from any thread. But usage from the compositor thread should be + // avoided to prevent contention with worker threads. + // Returns true if there are pending queries that could not be completed in + // this check. + virtual bool CheckRasterFinishedQueries() = 0; }; } // namespace cc diff --git a/chromium/cc/raster/raster_buffer_provider_perftest.cc b/chromium/cc/raster/raster_buffer_provider_perftest.cc index 36db4d2c575..bbdd6f57bda 100644 --- a/chromium/cc/raster/raster_buffer_provider_perftest.cc +++ b/chromium/cc/raster/raster_buffer_provider_perftest.cc @@ -117,6 +117,12 @@ class PerfContextProvider } return test_context_provider_->GrContext(); } + gpu::SharedImageInterface* SharedImageInterface() override { + if (!test_context_provider_) { + test_context_provider_ = viz::TestContextProvider::Create(); + } + return test_context_provider_->SharedImageInterface(); + } viz::ContextCacheController* CacheController() override { return &cache_controller_; } @@ -305,7 +311,7 @@ class RasterBufferProviderPerfTestBase { for (auto& decode_task : raster_task->dependencies()) { // Add decode task if it doesn't already exist in graph. - TaskGraph::Node::Vector::iterator decode_it = + auto decode_it = std::find_if(graph->nodes.begin(), graph->nodes.end(), [decode_task](const TaskGraph::Node& node) { return node.task == decode_task; diff --git a/chromium/cc/raster/raster_buffer_provider_unittest.cc b/chromium/cc/raster/raster_buffer_provider_unittest.cc index cbe334d7eab..0cbd9467744 100644 --- a/chromium/cc/raster/raster_buffer_provider_unittest.cc +++ b/chromium/cc/raster/raster_buffer_provider_unittest.cc @@ -16,6 +16,7 @@ #include "base/macros.h" #include "base/run_loop.h" #include "base/single_thread_task_runner.h" +#include "base/test/metrics/histogram_tester.h" #include "base/threading/thread_task_runner_handle.h" #include "cc/base/unique_notifier.h" #include "cc/paint/draw_image.h" @@ -171,7 +172,7 @@ class RasterBufferProviderTest Create3dResourceProvider(); raster_buffer_provider_ = std::make_unique<GpuRasterBufferProvider>( context_provider_.get(), worker_context_provider_.get(), false, 0, - viz::RGBA_8888, gfx::Size(), true, false); + viz::RGBA_8888, gfx::Size(), true, false, 1); break; case RASTER_BUFFER_PROVIDER_TYPE_BITMAP: CreateSoftwareResourceProvider(); @@ -487,6 +488,37 @@ TEST_P(RasterBufferProviderTest, WaitOnSyncTokenAfterReschedulingTask) { EXPECT_FALSE(completed_tasks()[1].canceled); } +TEST_P(RasterBufferProviderTest, MeasureGpuRasterDuration) { + if (GetParam() != RASTER_BUFFER_PROVIDER_TYPE_GPU) + return; + + // Schedule a task. + AppendTask(0u); + ScheduleTasks(); + RunMessageLoopUntilAllTasksHaveCompleted(); + + // Wait for the GPU side work to finish. + base::RunLoop run_loop; + std::vector<const ResourcePool::InUsePoolResource*> array; + for (const auto& resource : resources_) + array.push_back(&resource); + uint64_t callback_id = raster_buffer_provider_->SetReadyToDrawCallback( + array, + base::Bind([](base::RunLoop* run_loop) { run_loop->Quit(); }, &run_loop), + 0); + ASSERT_TRUE(callback_id); + run_loop.Run(); + + // Poll the task and make sure a histogram is logged. + base::HistogramTester histogram_tester; + std::string histogram("Renderer4.Renderer.RasterTaskTotalDuration.Gpu"); + histogram_tester.ExpectTotalCount(histogram, 0); + bool has_pending_queries = + raster_buffer_provider_->CheckRasterFinishedQueries(); + EXPECT_FALSE(has_pending_queries); + histogram_tester.ExpectTotalCount(histogram, 1); +} + INSTANTIATE_TEST_CASE_P( RasterBufferProviderTests, RasterBufferProviderTest, diff --git a/chromium/cc/raster/raster_source.cc b/chromium/cc/raster/raster_source.cc index be0cb4fbb3f..8f510fadc4c 100644 --- a/chromium/cc/raster/raster_source.cc +++ b/chromium/cc/raster/raster_source.cc @@ -94,16 +94,16 @@ void RasterSource::ClearForOpaqueRaster( if (device_column.intersect(playback_device_rect)) { clear_type = RasterSourceClearType::kBorder; raster_canvas->save(); - raster_canvas->clipRect(SkRect::MakeFromIRect(device_column), - SkClipOp::kIntersect, false); + raster_canvas->clipRect(SkRect::Make(device_column), SkClipOp::kIntersect, + false); raster_canvas->drawColor(background_color_, SkBlendMode::kSrc); raster_canvas->restore(); } if (device_row.intersect(playback_device_rect)) { clear_type = RasterSourceClearType::kBorder; raster_canvas->save(); - raster_canvas->clipRect(SkRect::MakeFromIRect(device_row), - SkClipOp::kIntersect, false); + raster_canvas->clipRect(SkRect::Make(device_row), SkClipOp::kIntersect, + false); raster_canvas->drawColor(background_color_, SkBlendMode::kSrc); raster_canvas->restore(); } @@ -154,7 +154,7 @@ void RasterSource::PlaybackToCanvas( raster_canvas->save(); raster_canvas->translate(-canvas_bitmap_rect.x(), -canvas_bitmap_rect.y()); - raster_canvas->clipRect(SkRect::MakeFromIRect(raster_bounds)); + raster_canvas->clipRect(SkRect::Make(raster_bounds)); raster_canvas->translate(raster_transform.translation().x(), raster_transform.translation().y()); raster_canvas->scale(raster_transform.scale() / recording_scale_factor_, diff --git a/chromium/cc/raster/staging_buffer_pool.cc b/chromium/cc/raster/staging_buffer_pool.cc index eada8b18943..a24fe9a16fb 100644 --- a/chromium/cc/raster/staging_buffer_pool.cc +++ b/chromium/cc/raster/staging_buffer_pool.cc @@ -6,7 +6,6 @@ #include <memory> -#include "base/memory/memory_coordinator_client_registry.h" #include "base/strings/stringprintf.h" #include "base/threading/thread_task_runner_handle.h" #include "base/trace_event/memory_dump_manager.h" @@ -35,14 +34,16 @@ const int kMaxCheckForQueryResultAvailableAttempts = 256; // Delay before a staging buffer might be released. const int kStagingBufferExpirationDelayMs = 1000; -bool CheckForQueryResult(gpu::raster::RasterInterface* ri, unsigned query_id) { - unsigned complete = 1; +bool CheckForQueryResult(gpu::raster::RasterInterface* ri, GLuint query_id) { + DCHECK(query_id); + GLuint complete = 1; ri->GetQueryObjectuivEXT(query_id, GL_QUERY_RESULT_AVAILABLE_EXT, &complete); return !!complete; } -void WaitForQueryResult(gpu::raster::RasterInterface* ri, unsigned query_id) { +void WaitForQueryResult(gpu::raster::RasterInterface* ri, GLuint query_id) { TRACE_EVENT0("cc", "WaitForQueryResult"); + DCHECK(query_id); int attempts_left = kMaxCheckForQueryResultAvailableAttempts; while (attempts_left--) { @@ -57,19 +58,14 @@ void WaitForQueryResult(gpu::raster::RasterInterface* ri, unsigned query_id) { kCheckForQueryResultAvailableTickRateMs)); } - unsigned result = 0; + GLuint result = 0; ri->GetQueryObjectuivEXT(query_id, GL_QUERY_RESULT_EXT, &result); } } // namespace StagingBuffer::StagingBuffer(const gfx::Size& size, viz::ResourceFormat format) - : size(size), - format(format), - texture_id(0), - image_id(0), - query_id(0), - content_id(0) {} + : size(size), format(format) {} StagingBuffer::~StagingBuffer() { DCHECK_EQ(texture_id, 0u); @@ -139,7 +135,6 @@ StagingBufferPool::StagingBufferPool( base::trace_event::MemoryDumpManager::GetInstance()->RegisterDumpProvider( this, "cc::StagingBufferPool", base::ThreadTaskRunnerHandle::Get()); - base::MemoryCoordinatorClientRegistry::GetInstance()->Register(this); memory_pressure_listener_.reset(new base::MemoryPressureListener( base::BindRepeating(&StagingBufferPool::OnMemoryPressure, weak_ptr_factory_.GetWeakPtr()))); @@ -149,7 +144,6 @@ StagingBufferPool::StagingBufferPool( } StagingBufferPool::~StagingBufferPool() { - base::MemoryCoordinatorClientRegistry::GetInstance()->Unregister(this); base::trace_event::MemoryDumpManager::GetInstance()->UnregisterDumpProvider( this); } @@ -257,14 +251,15 @@ std::unique_ptr<StagingBuffer> StagingBufferPool::AcquireStagingBuffer( DCHECK(ri); // Check if any busy buffers have become available. - if (worker_context_provider_->ContextCapabilities().sync_query) { - while (!busy_buffers_.empty()) { - if (!CheckForQueryResult(ri, busy_buffers_.front()->query_id)) - break; + while (!busy_buffers_.empty()) { + // Early out if query isn't used, or if query isn't complete yet. Query is + // created in OneCopyRasterBufferProvider::CopyOnWorkerThread(). + if (!busy_buffers_.front()->query_id || + !CheckForQueryResult(ri, busy_buffers_.front()->query_id)) + break; - MarkStagingBufferAsFree(busy_buffers_.front().get()); - free_buffers_.push_back(PopFront(&busy_buffers_)); - } + MarkStagingBufferAsFree(busy_buffers_.front().get()); + free_buffers_.push_back(PopFront(&busy_buffers_)); } // Wait for memory usage of non-free buffers to become less than the limit. @@ -275,12 +270,12 @@ std::unique_ptr<StagingBuffer> StagingBufferPool::AcquireStagingBuffer( if (busy_buffers_.empty()) break; - if (worker_context_provider_->ContextCapabilities().sync_query) { + if (busy_buffers_.front()->query_id) { WaitForQueryResult(ri, busy_buffers_.front()->query_id); MarkStagingBufferAsFree(busy_buffers_.front().get()); free_buffers_.push_back(PopFront(&busy_buffers_)); } else { - // Fall-back to glFinish if CHROMIUM_sync_query is not available. + // Fall back to glFinish if query isn't used. ri->Finish(); while (!busy_buffers_.empty()) { MarkStagingBufferAsFree(busy_buffers_.front().get()); @@ -424,12 +419,6 @@ void StagingBufferPool::ReleaseBuffersNotUsedSince(base::TimeTicks time) { } } -void StagingBufferPool::OnPurgeMemory() { - base::AutoLock lock(lock_); - // Release all buffers, regardless of how recently they were used. - ReleaseBuffersNotUsedSince(base::TimeTicks() + base::TimeDelta::Max()); -} - void StagingBufferPool::OnMemoryPressure( base::MemoryPressureListener::MemoryPressureLevel level) { base::AutoLock lock(lock_); @@ -438,6 +427,7 @@ void StagingBufferPool::OnMemoryPressure( case base::MemoryPressureListener::MEMORY_PRESSURE_LEVEL_MODERATE: break; case base::MemoryPressureListener::MEMORY_PRESSURE_LEVEL_CRITICAL: + // Release all buffers, regardless of how recently they were used. ReleaseBuffersNotUsedSince(base::TimeTicks() + base::TimeDelta::Max()); break; } diff --git a/chromium/cc/raster/staging_buffer_pool.h b/chromium/cc/raster/staging_buffer_pool.h index 881c34e0acd..687b0066588 100644 --- a/chromium/cc/raster/staging_buffer_pool.h +++ b/chromium/cc/raster/staging_buffer_pool.h @@ -12,7 +12,6 @@ #include "base/containers/circular_deque.h" #include "base/macros.h" -#include "base/memory/memory_coordinator_client.h" #include "base/memory/memory_pressure_listener.h" #include "base/memory/weak_ptr.h" #include "base/sequenced_task_runner.h" @@ -22,6 +21,7 @@ #include "base/trace_event/trace_event.h" #include "cc/cc_export.h" #include "components/viz/common/resources/resource_format.h" +#include "gpu/command_buffer/common/gl2_types.h" #include "ui/gfx/geometry/size.h" #include "ui/gfx/gpu_memory_buffer.h" @@ -51,17 +51,31 @@ struct StagingBuffer { const gfx::Size size; const viz::ResourceFormat format; - std::unique_ptr<gfx::GpuMemoryBuffer> gpu_memory_buffer; base::TimeTicks last_usage; - unsigned texture_id; - unsigned image_id; - unsigned query_id; - uint64_t content_id; + + // The following fields are initialized by OneCopyRasterBufferProvider. + // Storage for the staging buffer. This can be a GPU native or shared memory + // GpuMemoryBuffer. + std::unique_ptr<gfx::GpuMemoryBuffer> gpu_memory_buffer; + + // Id for image used to import the GpuMemoryBuffer to command buffer. + GLuint image_id = 0; + + // Id for texture that's bound to the GpuMemoryBuffer image. + GLuint texture_id = 0; + + // Id of command buffer query that tracks use of this staging buffer by the + // GPU. In general, GPU synchronization is necessary for native + // GpuMemoryBuffers. + GLuint query_id = 0; + + // Id of the content that's rastered into this staging buffer. Used to + // retrieve staging buffer with known content for reuse for partial raster. + uint64_t content_id = 0; }; class CC_EXPORT StagingBufferPool - : public base::trace_event::MemoryDumpProvider, - public base::MemoryCoordinatorClient { + : public base::trace_event::MemoryDumpProvider { public: ~StagingBufferPool() final; @@ -98,11 +112,6 @@ class CC_EXPORT StagingBufferPool void StagingStateAsValueInto( base::trace_event::TracedValue* staging_state) const; - // Overriden from base::MemoryCoordinatorClient. - void OnPurgeMemory() override; - - // TODO(gyuyoung): OnMemoryPressure is deprecated. So this should be removed - // when the memory coordinator is enabled by default. void OnMemoryPressure( base::MemoryPressureListener::MemoryPressureLevel level); diff --git a/chromium/cc/raster/staging_buffer_pool_unittest.cc b/chromium/cc/raster/staging_buffer_pool_unittest.cc index 7c7c79aec61..5fc5dac4d2c 100644 --- a/chromium/cc/raster/staging_buffer_pool_unittest.cc +++ b/chromium/cc/raster/staging_buffer_pool_unittest.cc @@ -4,8 +4,6 @@ #include "cc/raster/staging_buffer_pool.h" -#include "base/memory/memory_coordinator_client.h" -#include "base/memory/memory_coordinator_client_registry.h" #include "base/run_loop.h" #include "base/test/scoped_task_environment.h" #include "base/threading/thread_task_runner_handle.h" @@ -37,9 +35,10 @@ TEST(StagingBufferPoolTest, ShutdownImmediatelyAfterCreation) { flush_message_loop(); // Now, destroy the pool, and trigger a notification from the - // MemoryCoordinatorClientRegistry. + // MemoryPressureListener. pool = nullptr; - base::MemoryCoordinatorClientRegistry::GetInstance()->PurgeMemory(); + base::MemoryPressureListener::SimulatePressureNotification( + base::MemoryPressureListener::MEMORY_PRESSURE_LEVEL_CRITICAL); // Allow the callbacks in the observers to run. flush_message_loop(); // No crash. diff --git a/chromium/cc/raster/task_graph_work_queue.cc b/chromium/cc/raster/task_graph_work_queue.cc index 06b1f54e293..2d04f52e6aa 100644 --- a/chromium/cc/raster/task_graph_work_queue.cc +++ b/chromium/cc/raster/task_graph_work_queue.cc @@ -80,11 +80,11 @@ class DependentIterator { } while (graph_->edges[current_index_].task != task_); // Now find the node for the dependent of this edge. - TaskGraph::Node::Vector::iterator it = std::find_if( - graph_->nodes.begin(), graph_->nodes.end(), - [this](const TaskGraph::Node& node) { - return node.task == graph_->edges[current_index_].dependent; - }); + auto it = std::find_if(graph_->nodes.begin(), graph_->nodes.end(), + [this](const TaskGraph::Node& node) { + return node.task == + graph_->edges[current_index_].dependent; + }); DCHECK(it != graph_->nodes.end()); current_node_ = &(*it); @@ -152,11 +152,11 @@ void TaskGraphWorkQueue::ScheduleTasks(NamespaceToken token, TaskGraph* graph) { // Remove any old nodes that are associated with this task. The result is // that the old graph is left with all nodes not present in this graph, // which we use below to determine what tasks need to be canceled. - TaskGraph::Node::Vector::iterator old_it = std::find_if( - task_namespace.graph.nodes.begin(), task_namespace.graph.nodes.end(), - [&node](const TaskGraph::Node& other) { - return node.task == other.task; - }); + auto old_it = std::find_if(task_namespace.graph.nodes.begin(), + task_namespace.graph.nodes.end(), + [&node](const TaskGraph::Node& other) { + return node.task == other.task; + }); if (old_it != task_namespace.graph.nodes.end()) { std::swap(*old_it, task_namespace.graph.nodes.back()); // If old task is scheduled to run again and not yet started running, @@ -200,8 +200,7 @@ void TaskGraphWorkQueue::ScheduleTasks(NamespaceToken token, TaskGraph* graph) { task_namespace.graph.Swap(graph); // Determine what tasks in old graph need to be canceled. - for (TaskGraph::Node::Vector::iterator it = graph->nodes.begin(); - it != graph->nodes.end(); ++it) { + for (auto it = graph->nodes.begin(); it != graph->nodes.end(); ++it) { TaskGraph::Node& node = *it; // Skip if already finished running task. @@ -353,7 +352,7 @@ void TaskGraphWorkQueue::CompleteTask(PrioritizedTask completed_task) { void TaskGraphWorkQueue::CollectCompletedTasks(NamespaceToken token, Task::Vector* completed_tasks) { - TaskNamespaceMap::iterator it = namespaces_.find(token); + auto it = namespaces_.find(token); if (it == namespaces_.end()) return; diff --git a/chromium/cc/raster/task_graph_work_queue_unittest.cc b/chromium/cc/raster/task_graph_work_queue_unittest.cc index 123364ff813..93455db7e23 100644 --- a/chromium/cc/raster/task_graph_work_queue_unittest.cc +++ b/chromium/cc/raster/task_graph_work_queue_unittest.cc @@ -4,6 +4,7 @@ #include "cc/raster/task_graph_work_queue.h" +#include "build/build_config.h" #include "testing/gtest/include/gtest/gtest.h" namespace cc { @@ -58,5 +59,161 @@ TEST(TaskGraphWorkQueueTest, TestChangingDependency) { EXPECT_FALSE(work_queue.HasReadyToRunTasks()); } +// Tasks with same priority but in different category. +TEST(TaskGraphWorkQueueTest, TestTaskWithDifferentCategory) { + TaskGraphWorkQueue work_queue; + NamespaceToken token = work_queue.GenerateNamespaceToken(); + + // Create a graph where | task| has dependencies. + TaskGraph graph; + scoped_refptr<FakeTaskImpl> task(new FakeTaskImpl()); + scoped_refptr<FakeTaskImpl> dependency_task1(new FakeTaskImpl()); + scoped_refptr<FakeTaskImpl> dependency_task2(new FakeTaskImpl()); + scoped_refptr<FakeTaskImpl> dependency_task3(new FakeTaskImpl()); + + graph.nodes.push_back(TaskGraph::Node(task.get(), 0u, 0u, 3u)); + graph.nodes.push_back(TaskGraph::Node(dependency_task1.get(), 0u, 0u, 0u)); + graph.nodes.push_back(TaskGraph::Node(dependency_task2.get(), 1u, 0u, 0u)); + graph.nodes.push_back(TaskGraph::Node(dependency_task3.get(), 2u, 0u, 0u)); + + graph.edges.push_back(TaskGraph::Edge(dependency_task1.get(), task.get())); + graph.edges.push_back(TaskGraph::Edge(dependency_task2.get(), task.get())); + graph.edges.push_back(TaskGraph::Edge(dependency_task3.get(), task.get())); + + // Schedule the graph. + work_queue.ScheduleTasks(token, &graph); + + // Run the |dependency_task1|from category 0. + TaskGraphWorkQueue::PrioritizedTask prioritized_dependency_task = + work_queue.GetNextTaskToRun(0u); + EXPECT_EQ(prioritized_dependency_task.task.get(), dependency_task1.get()); + work_queue.CompleteTask(std::move(prioritized_dependency_task)); + EXPECT_TRUE(work_queue.HasReadyToRunTasks()); + EXPECT_FALSE(work_queue.HasReadyToRunTasksForCategory(0u)); + EXPECT_TRUE(work_queue.HasReadyToRunTasksForCategory(1u)); + EXPECT_TRUE(work_queue.HasReadyToRunTasksForCategory(2u)); + + // Run the |dependency_task2|from category 1. + prioritized_dependency_task = work_queue.GetNextTaskToRun(1u); + EXPECT_EQ(prioritized_dependency_task.task.get(), dependency_task2.get()); + work_queue.CompleteTask(std::move(prioritized_dependency_task)); + EXPECT_TRUE(work_queue.HasReadyToRunTasks()); + EXPECT_FALSE(work_queue.HasReadyToRunTasksForCategory(0u)); + EXPECT_FALSE(work_queue.HasReadyToRunTasksForCategory(1u)); + EXPECT_TRUE(work_queue.HasReadyToRunTasksForCategory(2u)); + + // Run the |dependency_task3|from category 2. + prioritized_dependency_task = work_queue.GetNextTaskToRun(2u); + EXPECT_EQ(prioritized_dependency_task.task.get(), dependency_task3.get()); + work_queue.CompleteTask(std::move(prioritized_dependency_task)); + EXPECT_TRUE(work_queue.HasReadyToRunTasks()); + // Once all dependencies from different category completed, | task| turns + // ready to run. + EXPECT_TRUE(work_queue.HasReadyToRunTasksForCategory(0u)); + EXPECT_FALSE(work_queue.HasReadyToRunTasksForCategory(1u)); + EXPECT_FALSE(work_queue.HasReadyToRunTasksForCategory(2u)); + + prioritized_dependency_task = work_queue.GetNextTaskToRun(0u); + EXPECT_EQ(prioritized_dependency_task.task.get(), task.get()); + work_queue.CompleteTask(std::move(prioritized_dependency_task)); + EXPECT_FALSE(work_queue.HasReadyToRunTasks()); +} + +// Tasks with different priority run in a priority order. But need to guarantee +// its dependences are completed. +TEST(TaskGraphWorkQueueTest, TestTaskWithDifferentPriority) { + TaskGraphWorkQueue work_queue; + NamespaceToken token = work_queue.GenerateNamespaceToken(); + { + // Create a graph where task has a dependency + TaskGraph graph; + scoped_refptr<FakeTaskImpl> task(new FakeTaskImpl()); + scoped_refptr<FakeTaskImpl> dependency_task1(new FakeTaskImpl()); + scoped_refptr<FakeTaskImpl> dependency_task2(new FakeTaskImpl()); + scoped_refptr<FakeTaskImpl> dependency_task3(new FakeTaskImpl()); + + // | task| has the lowest priority and 3 dependences, will run last. + graph.nodes.push_back(TaskGraph::Node(task.get(), 0u, 2u, 3u)); + graph.nodes.push_back(TaskGraph::Node(dependency_task1.get(), 0u, 3u, 0u)); + graph.nodes.push_back(TaskGraph::Node(dependency_task2.get(), 0u, 2u, 0u)); + graph.nodes.push_back(TaskGraph::Node(dependency_task3.get(), 0u, 1u, 0u)); + + graph.edges.push_back(TaskGraph::Edge(dependency_task1.get(), task.get())); + graph.edges.push_back(TaskGraph::Edge(dependency_task2.get(), task.get())); + graph.edges.push_back(TaskGraph::Edge(dependency_task3.get(), task.get())); + + // Schedule the graph. + work_queue.ScheduleTasks(token, &graph); + + // Run the |dependency_task| + TaskGraphWorkQueue::PrioritizedTask prioritized_dependency_task = + work_queue.GetNextTaskToRun(0u); + EXPECT_EQ(prioritized_dependency_task.task.get(), dependency_task3.get()); + work_queue.CompleteTask(std::move(prioritized_dependency_task)); + EXPECT_TRUE(work_queue.HasReadyToRunTasks()); + + prioritized_dependency_task = work_queue.GetNextTaskToRun(0u); + EXPECT_EQ(prioritized_dependency_task.task.get(), dependency_task2.get()); + work_queue.CompleteTask(std::move(prioritized_dependency_task)); + EXPECT_TRUE(work_queue.HasReadyToRunTasks()); + + prioritized_dependency_task = work_queue.GetNextTaskToRun(0u); + EXPECT_EQ(prioritized_dependency_task.task.get(), dependency_task1.get()); + work_queue.CompleteTask(std::move(prioritized_dependency_task)); + EXPECT_TRUE(work_queue.HasReadyToRunTasks()); + + // | task| runs last. + prioritized_dependency_task = work_queue.GetNextTaskToRun(0u); + EXPECT_EQ(prioritized_dependency_task.task.get(), task.get()); + work_queue.CompleteTask(std::move(prioritized_dependency_task)); + EXPECT_FALSE(work_queue.HasReadyToRunTasks()); + } + + { + // Create a graph where task has dependencies + TaskGraph graph; + scoped_refptr<FakeTaskImpl> task(new FakeTaskImpl()); + scoped_refptr<FakeTaskImpl> dependency_task1(new FakeTaskImpl()); + scoped_refptr<FakeTaskImpl> dependency_task2(new FakeTaskImpl()); + scoped_refptr<FakeTaskImpl> dependency_task3(new FakeTaskImpl()); + + // | task| has the highest priority and 3 dependences, also will run last. + graph.nodes.push_back(TaskGraph::Node(task.get(), 0u, 0u, 3u)); + graph.nodes.push_back(TaskGraph::Node(dependency_task1.get(), 0u, 3u, 0u)); + graph.nodes.push_back(TaskGraph::Node(dependency_task2.get(), 0u, 2u, 0u)); + graph.nodes.push_back(TaskGraph::Node(dependency_task3.get(), 0u, 1u, 0u)); + + graph.edges.push_back(TaskGraph::Edge(dependency_task1.get(), task.get())); + graph.edges.push_back(TaskGraph::Edge(dependency_task2.get(), task.get())); + graph.edges.push_back(TaskGraph::Edge(dependency_task3.get(), task.get())); + + // Schedule the graph. + work_queue.ScheduleTasks(token, &graph); + + // Run the |dependency_task| + TaskGraphWorkQueue::PrioritizedTask prioritized_dependency_task = + work_queue.GetNextTaskToRun(0u); + EXPECT_EQ(prioritized_dependency_task.task.get(), dependency_task3.get()); + work_queue.CompleteTask(std::move(prioritized_dependency_task)); + EXPECT_TRUE(work_queue.HasReadyToRunTasks()); + + prioritized_dependency_task = work_queue.GetNextTaskToRun(0u); + EXPECT_EQ(prioritized_dependency_task.task.get(), dependency_task2.get()); + work_queue.CompleteTask(std::move(prioritized_dependency_task)); + EXPECT_TRUE(work_queue.HasReadyToRunTasks()); + + prioritized_dependency_task = work_queue.GetNextTaskToRun(0u); + EXPECT_EQ(prioritized_dependency_task.task.get(), dependency_task1.get()); + work_queue.CompleteTask(std::move(prioritized_dependency_task)); + EXPECT_TRUE(work_queue.HasReadyToRunTasks()); + + // | task| runs last. + prioritized_dependency_task = work_queue.GetNextTaskToRun(0u); + EXPECT_EQ(prioritized_dependency_task.task.get(), task.get()); + work_queue.CompleteTask(std::move(prioritized_dependency_task)); + EXPECT_FALSE(work_queue.HasReadyToRunTasks()); + } +} + } // namespace } // namespace cc diff --git a/chromium/cc/raster/texture_compressor.cc b/chromium/cc/raster/texture_compressor.cc deleted file mode 100644 index 6aabf6792df..00000000000 --- a/chromium/cc/raster/texture_compressor.cc +++ /dev/null @@ -1,35 +0,0 @@ -// Copyright 2015 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "cc/raster/texture_compressor.h" - -#include "base/logging.h" -#include "base/memory/ptr_util.h" -#include "cc/raster/texture_compressor_etc1.h" - -#if defined(ARCH_CPU_X86_FAMILY) -#include "base/cpu.h" -#include "cc/raster/texture_compressor_etc1_sse.h" -#endif - -namespace cc { - -std::unique_ptr<TextureCompressor> TextureCompressor::Create(Format format) { - switch (format) { - case kFormatETC1: { -#if defined(ARCH_CPU_X86_FAMILY) - base::CPU cpu; - if (cpu.has_sse2()) { - return base::WrapUnique(new TextureCompressorETC1SSE()); - } -#endif - return base::WrapUnique(new TextureCompressorETC1()); - } - } - - NOTREACHED(); - return nullptr; -} - -} // namespace cc diff --git a/chromium/cc/raster/texture_compressor.h b/chromium/cc/raster/texture_compressor.h deleted file mode 100644 index 709fc264516..00000000000 --- a/chromium/cc/raster/texture_compressor.h +++ /dev/null @@ -1,47 +0,0 @@ -// Copyright 2015 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#ifndef CC_RASTER_TEXTURE_COMPRESSOR_H_ -#define CC_RASTER_TEXTURE_COMPRESSOR_H_ - -#include <stdint.h> - -#include <memory> - -#include "base/macros.h" -#include "cc/cc_export.h" - -namespace cc { - -class CC_EXPORT TextureCompressor { - public: - enum Format { - kFormatETC1, - }; - - enum Quality { - kQualityLow, - kQualityMedium, - kQualityHigh, - }; - - static std::unique_ptr<TextureCompressor> Create(Format format); - virtual ~TextureCompressor() {} - - virtual void Compress(const uint8_t* src, - uint8_t* dst, - int width, - int height, - Quality quality) = 0; - - protected: - TextureCompressor() {} - - private: - DISALLOW_COPY_AND_ASSIGN(TextureCompressor); -}; - -} // namespace cc - -#endif // CC_RASTER_TEXTURE_COMPRESSOR_H_ diff --git a/chromium/cc/raster/texture_compressor_etc1.cc b/chromium/cc/raster/texture_compressor_etc1.cc deleted file mode 100644 index 55b0ca4640c..00000000000 --- a/chromium/cc/raster/texture_compressor_etc1.cc +++ /dev/null @@ -1,333 +0,0 @@ -// Copyright 2015 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -// See the following specification for details on the ETC1 format: -// https://www.khronos.org/registry/gles/extensions/OES/OES_compressed_ETC1_RGB8_texture.txt - -#include "cc/raster/texture_compressor_etc1.h" - -#include <stdint.h> -#include <string.h> -#include <limits> - -#include "base/logging.h" - -// Defining the following macro will cause the error metric function to weigh -// each color channel differently depending on how the human eye can perceive -// them. This can give a slight improvement in image quality at the cost of a -// performance hit. -// #define USE_PERCEIVED_ERROR_METRIC - -namespace cc { - -namespace { - -// Constructs a color from a given base color and luminance value. -inline Color MakeColor(const Color& base, int16_t lum) { - int b = static_cast<int>(base.channels.b) + lum; - int g = static_cast<int>(base.channels.g) + lum; - int r = static_cast<int>(base.channels.r) + lum; - Color color; - color.channels.b = static_cast<uint8_t>(clamp(b, 0, 255)); - color.channels.g = static_cast<uint8_t>(clamp(g, 0, 255)); - color.channels.r = static_cast<uint8_t>(clamp(r, 0, 255)); - return color; -} - -// Calculates the error metric for two colors. A small error signals that the -// colors are similar to each other, a large error the signals the opposite. -inline uint32_t GetColorError(const Color& u, const Color& v) { -#ifdef USE_PERCEIVED_ERROR_METRIC - float delta_b = static_cast<float>(u.channels.b) - v.channels.b; - float delta_g = static_cast<float>(u.channels.g) - v.channels.g; - float delta_r = static_cast<float>(u.channels.r) - v.channels.r; - return static_cast<uint32_t>(0.299f * delta_b * delta_b + - 0.587f * delta_g * delta_g + - 0.114f * delta_r * delta_r); -#else - int delta_b = static_cast<int>(u.channels.b) - v.channels.b; - int delta_g = static_cast<int>(u.channels.g) - v.channels.g; - int delta_r = static_cast<int>(u.channels.r) - v.channels.r; - return delta_b * delta_b + delta_g * delta_g + delta_r * delta_r; -#endif -} - -void GetAverageColor(const Color* src, float* avg_color) { - uint32_t sum_b = 0, sum_g = 0, sum_r = 0; - - for (unsigned int i = 0; i < 8; ++i) { - sum_b += src[i].channels.b; - sum_g += src[i].channels.g; - sum_r += src[i].channels.r; - } - - const float kInv8 = 1.0f / 8.0f; - avg_color[0] = static_cast<float>(sum_b) * kInv8; - avg_color[1] = static_cast<float>(sum_g) * kInv8; - avg_color[2] = static_cast<float>(sum_r) * kInv8; -} - -void ComputeLuminance(uint8_t* block, - const Color* src, - const Color& base, - int sub_block_id, - const uint8_t* idx_to_num_tab) { - uint32_t best_tbl_err = std::numeric_limits<uint32_t>::max(); - uint8_t best_tbl_idx = 0; - uint8_t best_mod_idx[8][8]; // [table][texel] - - // Try all codeword tables to find the one giving the best results for this - // block. - for (unsigned int tbl_idx = 0; tbl_idx < 8; ++tbl_idx) { - // Pre-compute all the candidate colors; combinations of the base color and - // all available luminance values. - Color candidate_color[4]; // [modifier] - for (unsigned int mod_idx = 0; mod_idx < 4; ++mod_idx) { - int16_t lum = g_codeword_tables[tbl_idx][mod_idx]; - candidate_color[mod_idx] = MakeColor(base, lum); - } - - uint32_t tbl_err = 0; - - for (unsigned int i = 0; i < 8; ++i) { - // Try all modifiers in the current table to find which one gives the - // smallest error. - uint32_t best_mod_err = std::numeric_limits<uint32_t>::max(); - for (unsigned int mod_idx = 0; mod_idx < 4; ++mod_idx) { - const Color& color = candidate_color[mod_idx]; - - uint32_t mod_err = GetColorError(src[i], color); - if (mod_err < best_mod_err) { - best_mod_idx[tbl_idx][i] = mod_idx; - best_mod_err = mod_err; - - if (mod_err == 0) - break; // We cannot do any better than this. - } - } - - tbl_err += best_mod_err; - if (tbl_err > best_tbl_err) - break; // We're already doing worse than the best table so skip. - } - - if (tbl_err < best_tbl_err) { - best_tbl_err = tbl_err; - best_tbl_idx = tbl_idx; - - if (tbl_err == 0) - break; // We cannot do any better than this. - } - } - - WriteCodewordTable(block, sub_block_id, best_tbl_idx); - - uint32_t pix_data = 0; - - for (unsigned int i = 0; i < 8; ++i) { - uint8_t mod_idx = best_mod_idx[best_tbl_idx][i]; - uint8_t pix_idx = g_mod_to_pix[mod_idx]; - - uint32_t lsb = pix_idx & 0x1; - uint32_t msb = pix_idx >> 1; - - // Obtain the texel number as specified in the standard. - int texel_num = idx_to_num_tab[i]; - pix_data |= msb << (texel_num + 16); - pix_data |= lsb << (texel_num); - } - - WritePixelData(block, pix_data); -} - -/** - * Tries to compress the block under the assumption that it's a single color - * block. If it's not the function will bail out without writing anything to - * the destination buffer. - */ -bool TryCompressSolidBlock(uint8_t* dst, const Color* src) { - for (unsigned int i = 1; i < 16; ++i) { - if (src[i].bits != src[0].bits) - return false; - } - - // Clear destination buffer so that we can "or" in the results. - memset(dst, 0, 8); - - float src_color_float[3] = {static_cast<float>(src->channels.b), - static_cast<float>(src->channels.g), - static_cast<float>(src->channels.r)}; - Color base = MakeColor555(src_color_float); - - WriteDiff(dst, true); - WriteFlip(dst, false); - WriteColors555(dst, base, base); - - uint8_t best_tbl_idx = 0; - uint8_t best_mod_idx = 0; - uint32_t best_mod_err = std::numeric_limits<uint32_t>::max(); - - // Try all codeword tables to find the one giving the best results for this - // block. - for (unsigned int tbl_idx = 0; tbl_idx < 8; ++tbl_idx) { - // Try all modifiers in the current table to find which one gives the - // smallest error. - for (unsigned int mod_idx = 0; mod_idx < 4; ++mod_idx) { - int16_t lum = g_codeword_tables[tbl_idx][mod_idx]; - const Color& color = MakeColor(base, lum); - - uint32_t mod_err = GetColorError(*src, color); - if (mod_err < best_mod_err) { - best_tbl_idx = tbl_idx; - best_mod_idx = mod_idx; - best_mod_err = mod_err; - - if (mod_err == 0) - break; // We cannot do any better than this. - } - } - - if (best_mod_err == 0) - break; - } - - WriteCodewordTable(dst, 0, best_tbl_idx); - WriteCodewordTable(dst, 1, best_tbl_idx); - - uint8_t pix_idx = g_mod_to_pix[best_mod_idx]; - uint32_t lsb = pix_idx & 0x1; - uint32_t msb = pix_idx >> 1; - - uint32_t pix_data = 0; - for (unsigned int i = 0; i < 2; ++i) { - for (unsigned int j = 0; j < 8; ++j) { - // Obtain the texel number as specified in the standard. - int texel_num = g_idx_to_num[i][j]; - pix_data |= msb << (texel_num + 16); - pix_data |= lsb << (texel_num); - } - } - - WritePixelData(dst, pix_data); - return true; -} - -void CompressBlock(uint8_t* dst, const Color* ver_src, const Color* hor_src) { - if (TryCompressSolidBlock(dst, ver_src)) - return; - - const Color* sub_block_src[4] = {ver_src, ver_src + 8, hor_src, hor_src + 8}; - - Color sub_block_avg[4]; - bool use_differential[2] = {true, true}; - - // Compute the average color for each sub block and determine if differential - // coding can be used. - for (unsigned int i = 0, j = 1; i < 4; i += 2, j += 2) { - float avg_color_0[3]; - GetAverageColor(sub_block_src[i], avg_color_0); - Color avg_color_555_0 = MakeColor555(avg_color_0); - - float avg_color_1[3]; - GetAverageColor(sub_block_src[j], avg_color_1); - Color avg_color_555_1 = MakeColor555(avg_color_1); - - for (unsigned int light_idx = 0; light_idx < 3; ++light_idx) { - int u = avg_color_555_0.components[light_idx] >> 3; - int v = avg_color_555_1.components[light_idx] >> 3; - - int component_diff = v - u; - if (component_diff < -4 || component_diff > 3) { - use_differential[i / 2] = false; - sub_block_avg[i] = MakeColor444(avg_color_0); - sub_block_avg[j] = MakeColor444(avg_color_1); - } else { - sub_block_avg[i] = avg_color_555_0; - sub_block_avg[j] = avg_color_555_1; - } - } - } - - // Compute the error of each sub block before adjusting for luminance. These - // error values are later used for determining if we should flip the sub - // block or not. - uint32_t sub_block_err[4] = {0}; - for (unsigned int i = 0; i < 4; ++i) { - for (unsigned int j = 0; j < 8; ++j) { - sub_block_err[i] += GetColorError(sub_block_avg[i], sub_block_src[i][j]); - } - } - - bool flip = - sub_block_err[2] + sub_block_err[3] < sub_block_err[0] + sub_block_err[1]; - - // Clear destination buffer so that we can "or" in the results. - memset(dst, 0, 8); - - WriteDiff(dst, use_differential[!!flip]); - WriteFlip(dst, flip); - - uint8_t sub_block_off_0 = flip ? 2 : 0; - uint8_t sub_block_off_1 = sub_block_off_0 + 1; - - if (use_differential[!!flip]) { - WriteColors555(dst, sub_block_avg[sub_block_off_0], - sub_block_avg[sub_block_off_1]); - } else { - WriteColors444(dst, sub_block_avg[sub_block_off_0], - sub_block_avg[sub_block_off_1]); - } - - // Compute luminance for the first sub block. - ComputeLuminance(dst, sub_block_src[sub_block_off_0], - sub_block_avg[sub_block_off_0], 0, - g_idx_to_num[sub_block_off_0]); - // Compute luminance for the second sub block. - ComputeLuminance(dst, sub_block_src[sub_block_off_1], - sub_block_avg[sub_block_off_1], 1, - g_idx_to_num[sub_block_off_1]); -} - -} // namespace - -void TextureCompressorETC1::Compress(const uint8_t* src, - uint8_t* dst, - int width, - int height, - Quality quality) { - DCHECK_GE(width, 4); - DCHECK_EQ((width & 3), 0); - DCHECK_GE(height, 4); - DCHECK_EQ((height & 3), 0); - - Color ver_blocks[16]; - Color hor_blocks[16]; - - for (int y = 0; y < height; y += 4, src += width * 4 * 4) { - for (int x = 0; x < width; x += 4, dst += 8) { - const Color* row0 = reinterpret_cast<const Color*>(src + x * 4); - const Color* row1 = row0 + width; - const Color* row2 = row1 + width; - const Color* row3 = row2 + width; - - memcpy(ver_blocks, row0, 8); - memcpy(ver_blocks + 2, row1, 8); - memcpy(ver_blocks + 4, row2, 8); - memcpy(ver_blocks + 6, row3, 8); - memcpy(ver_blocks + 8, row0 + 2, 8); - memcpy(ver_blocks + 10, row1 + 2, 8); - memcpy(ver_blocks + 12, row2 + 2, 8); - memcpy(ver_blocks + 14, row3 + 2, 8); - - memcpy(hor_blocks, row0, 16); - memcpy(hor_blocks + 4, row1, 16); - memcpy(hor_blocks + 8, row2, 16); - memcpy(hor_blocks + 12, row3, 16); - - CompressBlock(dst, ver_blocks, hor_blocks); - } - } -} - -} // namespace cc diff --git a/chromium/cc/raster/texture_compressor_etc1.h b/chromium/cc/raster/texture_compressor_etc1.h deleted file mode 100644 index 6e85313cab0..00000000000 --- a/chromium/cc/raster/texture_compressor_etc1.h +++ /dev/null @@ -1,204 +0,0 @@ -// Copyright 2015 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#ifndef CC_RASTER_TEXTURE_COMPRESSOR_ETC1_H_ -#define CC_RASTER_TEXTURE_COMPRESSOR_ETC1_H_ - -#include "cc/raster/texture_compressor.h" - -#include <stdint.h> - -#include "base/compiler_specific.h" -#include "base/logging.h" -#include "base/macros.h" - -namespace cc { - -template <typename T> -inline T clamp(T val, T min, T max) { - return val < min ? min : (val > max ? max : val); -} - -inline uint8_t round_to_5_bits(float val) { - return clamp<uint8_t>(val * 31.0f / 255.0f + 0.5f, 0, 31); -} - -inline uint8_t round_to_4_bits(float val) { - return clamp<uint8_t>(val * 15.0f / 255.0f + 0.5f, 0, 15); -} - -union Color { - struct BgraColorType { - uint8_t b; - uint8_t g; - uint8_t r; - uint8_t a; - } channels; - uint8_t components[4]; - uint32_t bits; -}; - -// Codeword tables. -// See: Table 3.17.2 -alignas(16) static const int16_t g_codeword_tables[8][4] = { - {-8, -2, 2, 8}, {-17, -5, 5, 17}, {-29, -9, 9, 29}, - {-42, -13, 13, 42}, {-60, -18, 18, 60}, {-80, -24, 24, 80}, - {-106, -33, 33, 106}, {-183, -47, 47, 183}}; - -// Maps modifier indices to pixel index values. -// See: Table 3.17.3 -static const uint8_t g_mod_to_pix[4] = {3, 2, 0, 1}; - -// The ETC1 specification index texels as follows: -// [a][e][i][m] [ 0][ 4][ 8][12] -// [b][f][j][n] <-> [ 1][ 5][ 9][13] -// [c][g][k][o] [ 2][ 6][10][14] -// [d][h][l][p] [ 3][ 7][11][15] - -// [ 0][ 1][ 2][ 3] [ 0][ 1][ 4][ 5] -// [ 4][ 5][ 6][ 7] <-> [ 8][ 9][12][13] -// [ 8][ 9][10][11] [ 2][ 3][ 6][ 7] -// [12][13][14][15] [10][11][14][15] - -// However, when extracting sub blocks from BGRA data the natural array -// indexing order ends up different: -// vertical0: [a][e][b][f] horizontal0: [a][e][i][m] -// [c][g][d][h] [b][f][j][n] -// vertical1: [i][m][j][n] horizontal1: [c][g][k][o] -// [k][o][l][p] [d][h][l][p] - -// In order to translate from the natural array indices in a sub block to the -// indices (number) used by specification and hardware we use this table. -static const uint8_t g_idx_to_num[4][8] = { - {0, 4, 1, 5, 2, 6, 3, 7}, // Vertical block 0. - {8, 12, 9, 13, 10, 14, 11, 15}, // Vertical block 1. - {0, 4, 8, 12, 1, 5, 9, 13}, // Horizontal block 0. - {2, 6, 10, 14, 3, 7, 11, 15} // Horizontal block 1. -}; - -inline void WriteColors444(uint8_t* block, - const Color& color0, - const Color& color1) { - // Write output color for BGRA textures. - block[0] = (color0.channels.r & 0xf0) | (color1.channels.r >> 4); - block[1] = (color0.channels.g & 0xf0) | (color1.channels.g >> 4); - block[2] = (color0.channels.b & 0xf0) | (color1.channels.b >> 4); -} - -inline void WriteColors555(uint8_t* block, - const Color& color0, - const Color& color1) { - // Table for conversion to 3-bit two complement format. - static const uint8_t two_compl_trans_table[8] = { - 4, // -4 (100b) - 5, // -3 (101b) - 6, // -2 (110b) - 7, // -1 (111b) - 0, // 0 (000b) - 1, // 1 (001b) - 2, // 2 (010b) - 3, // 3 (011b) - }; - - int16_t delta_r = - static_cast<int16_t>(color1.channels.r >> 3) - (color0.channels.r >> 3); - int16_t delta_g = - static_cast<int16_t>(color1.channels.g >> 3) - (color0.channels.g >> 3); - int16_t delta_b = - static_cast<int16_t>(color1.channels.b >> 3) - (color0.channels.b >> 3); - DCHECK_GE(delta_r, -4); - DCHECK_LE(delta_r, 3); - DCHECK_GE(delta_g, -4); - DCHECK_LE(delta_g, 3); - DCHECK_GE(delta_b, -4); - DCHECK_LE(delta_b, 3); - - // Write output color for BGRA textures. - block[0] = (color0.channels.r & 0xf8) | two_compl_trans_table[delta_r + 4]; - block[1] = (color0.channels.g & 0xf8) | two_compl_trans_table[delta_g + 4]; - block[2] = (color0.channels.b & 0xf8) | two_compl_trans_table[delta_b + 4]; -} - -inline void WriteCodewordTable(uint8_t* block, - uint8_t sub_block_id, - uint8_t table) { - DCHECK_LT(sub_block_id, 2); - DCHECK_LT(table, 8); - - uint8_t shift = (2 + (3 - sub_block_id * 3)); - block[3] &= ~(0x07 << shift); - block[3] |= table << shift; -} - -inline void WritePixelData(uint8_t* block, uint32_t pixel_data) { - block[4] |= pixel_data >> 24; - block[5] |= (pixel_data >> 16) & 0xff; - block[6] |= (pixel_data >> 8) & 0xff; - block[7] |= pixel_data & 0xff; -} - -inline void WriteFlip(uint8_t* block, bool flip) { - block[3] &= ~0x01; - block[3] |= static_cast<uint8_t>(flip); -} - -inline void WriteDiff(uint8_t* block, bool diff) { - block[3] &= ~0x02; - block[3] |= static_cast<uint8_t>(diff) << 1; -} - -// Compress and rounds BGR888 into BGR444. The resulting BGR444 color is -// expanded to BGR888 as it would be in hardware after decompression. The -// actual 444-bit data is available in the four most significant bits of each -// channel. -inline Color MakeColor444(const float* bgr) { - uint8_t b4 = round_to_4_bits(bgr[0]); - uint8_t g4 = round_to_4_bits(bgr[1]); - uint8_t r4 = round_to_4_bits(bgr[2]); - Color bgr444; - bgr444.channels.b = (b4 << 4) | b4; - bgr444.channels.g = (g4 << 4) | g4; - bgr444.channels.r = (r4 << 4) | r4; - // Added to distinguish between expanded 555 and 444 colors. - bgr444.channels.a = 0x44; - return bgr444; -} - -// Compress and rounds BGR888 into BGR555. The resulting BGR555 color is -// expanded to BGR888 as it would be in hardware after decompression. The -// actual 555-bit data is available in the five most significant bits of each -// channel. -inline Color MakeColor555(const float* bgr) { - uint8_t b5 = round_to_5_bits(bgr[0]); - uint8_t g5 = round_to_5_bits(bgr[1]); - uint8_t r5 = round_to_5_bits(bgr[2]); - Color bgr555; - bgr555.channels.b = (b5 << 3) | (b5 >> 2); - bgr555.channels.g = (g5 << 3) | (g5 >> 2); - bgr555.channels.r = (r5 << 3) | (r5 >> 2); - // Added to distinguish between expanded 555 and 444 colors. - bgr555.channels.a = 0x55; - return bgr555; -} - -class CC_EXPORT TextureCompressorETC1 : public TextureCompressor { - public: - TextureCompressorETC1() {} - - // Compress a texture using ETC1. Note that the |quality| parameter is - // ignored. The current implementation does not support different quality - // settings. - void Compress(const uint8_t* src, - uint8_t* dst, - int width, - int height, - Quality quality) override; - - private: - DISALLOW_COPY_AND_ASSIGN(TextureCompressorETC1); -}; - -} // namespace cc - -#endif // CC_RASTER_TEXTURE_COMPRESSOR_ETC1_H_ diff --git a/chromium/cc/raster/texture_compressor_etc1_sse.cc b/chromium/cc/raster/texture_compressor_etc1_sse.cc deleted file mode 100644 index f0936885d13..00000000000 --- a/chromium/cc/raster/texture_compressor_etc1_sse.cc +++ /dev/null @@ -1,818 +0,0 @@ -// Copyright 2015 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "cc/raster/texture_compressor_etc1_sse.h" - -#include <emmintrin.h> -#include <stdint.h> - -#include "base/compiler_specific.h" -#include "base/logging.h" -// Using this header for common functions such as Color handling -// and codeword table. -#include "cc/raster/texture_compressor_etc1.h" - -namespace cc { - -namespace { - -inline uint32_t SetETC1MaxError(uint32_t avg_error) { - // ETC1 codeword table is sorted in ascending order. - // Our algorithm will try to identify the index that generates the minimum - // error. - // The min error calculated during ComputeLuminance main loop will converge - // towards that value. - // We use this threshold to determine when it doesn't make sense to iterate - // further through the array. - return avg_error + avg_error / 2 + 384; -} - -struct __sse_data { - // This is used to store raw data. - uint8_t* block; - // This is used to store 8 bit packed values. - __m128i* packed; - // This is used to store 32 bit zero extended values into 4x4 arrays. - __m128i* blue; - __m128i* green; - __m128i* red; -}; - -inline __m128i AddAndClamp(const __m128i x, const __m128i y) { - static const __m128i color_max = _mm_set1_epi32(0xFF); - return _mm_max_epi16(_mm_setzero_si128(), - _mm_min_epi16(_mm_add_epi16(x, y), color_max)); -} - -inline __m128i GetColorErrorSSE(const __m128i x, const __m128i y) { - // Changed from _mm_mullo_epi32 (SSE4) to _mm_mullo_epi16 (SSE2). - __m128i ret = _mm_sub_epi16(x, y); - return _mm_mullo_epi16(ret, ret); -} - -inline __m128i AddChannelError(const __m128i x, - const __m128i y, - const __m128i z) { - return _mm_add_epi32(x, _mm_add_epi32(y, z)); -} - -inline uint32_t SumSSE(const __m128i x) { - __m128i sum = _mm_add_epi32(x, _mm_shuffle_epi32(x, 0x4E)); - sum = _mm_add_epi32(sum, _mm_shuffle_epi32(sum, 0xB1)); - - return _mm_cvtsi128_si32(sum); -} - -inline uint32_t GetVerticalError(const __sse_data* data, - const __m128i* blue_avg, - const __m128i* green_avg, - const __m128i* red_avg, - uint32_t* verror) { - __m128i error = _mm_setzero_si128(); - - for (int i = 0; i < 4; i++) { - error = _mm_add_epi32(error, GetColorErrorSSE(data->blue[i], blue_avg[0])); - error = - _mm_add_epi32(error, GetColorErrorSSE(data->green[i], green_avg[0])); - error = _mm_add_epi32(error, GetColorErrorSSE(data->red[i], red_avg[0])); - } - - error = _mm_add_epi32(error, _mm_shuffle_epi32(error, 0x4E)); - - verror[0] = _mm_cvtsi128_si32(error); - verror[1] = _mm_cvtsi128_si32(_mm_shuffle_epi32(error, 0xB1)); - - return verror[0] + verror[1]; -} - -inline uint32_t GetHorizontalError(const __sse_data* data, - const __m128i* blue_avg, - const __m128i* green_avg, - const __m128i* red_avg, - uint32_t* verror) { - __m128i error = _mm_setzero_si128(); - int first_index, second_index; - - for (int i = 0; i < 2; i++) { - first_index = 2 * i; - second_index = first_index + 1; - - error = _mm_add_epi32( - error, GetColorErrorSSE(data->blue[first_index], blue_avg[i])); - error = _mm_add_epi32( - error, GetColorErrorSSE(data->blue[second_index], blue_avg[i])); - error = _mm_add_epi32( - error, GetColorErrorSSE(data->green[first_index], green_avg[i])); - error = _mm_add_epi32( - error, GetColorErrorSSE(data->green[second_index], green_avg[i])); - error = _mm_add_epi32(error, - GetColorErrorSSE(data->red[first_index], red_avg[i])); - error = _mm_add_epi32( - error, GetColorErrorSSE(data->red[second_index], red_avg[i])); - } - - error = _mm_add_epi32(error, _mm_shuffle_epi32(error, 0x4E)); - - verror[0] = _mm_cvtsi128_si32(error); - verror[1] = _mm_cvtsi128_si32(_mm_shuffle_epi32(error, 0xB1)); - - return verror[0] + verror[1]; -} - -inline void GetAvgColors(const __sse_data* data, - float* output, - bool* __sse_use_diff) { - __m128i sum[2], tmp; - - // TODO(radu.velea): _mm_avg_epu8 on packed data maybe. - - // Compute avg red value. - // [S0 S0 S1 S1] - sum[0] = _mm_add_epi32(data->red[0], data->red[1]); - sum[0] = _mm_add_epi32(sum[0], _mm_shuffle_epi32(sum[0], 0xB1)); - - // [S2 S2 S3 S3] - sum[1] = _mm_add_epi32(data->red[2], data->red[3]); - sum[1] = _mm_add_epi32(sum[1], _mm_shuffle_epi32(sum[1], 0xB1)); - - float hred[2], vred[2]; - hred[0] = (_mm_cvtsi128_si32( - _mm_add_epi32(sum[0], _mm_shuffle_epi32(sum[0], 0x4E)))) / - 8.0f; - hred[1] = (_mm_cvtsi128_si32( - _mm_add_epi32(sum[1], _mm_shuffle_epi32(sum[1], 0x4E)))) / - 8.0f; - - tmp = _mm_add_epi32(sum[0], sum[1]); - vred[0] = (_mm_cvtsi128_si32(tmp)) / 8.0f; - vred[1] = (_mm_cvtsi128_si32(_mm_shuffle_epi32(tmp, 0x2))) / 8.0f; - - // Compute avg green value. - // [S0 S0 S1 S1] - sum[0] = _mm_add_epi32(data->green[0], data->green[1]); - sum[0] = _mm_add_epi32(sum[0], _mm_shuffle_epi32(sum[0], 0xB1)); - - // [S2 S2 S3 S3] - sum[1] = _mm_add_epi32(data->green[2], data->green[3]); - sum[1] = _mm_add_epi32(sum[1], _mm_shuffle_epi32(sum[1], 0xB1)); - - float hgreen[2], vgreen[2]; - hgreen[0] = (_mm_cvtsi128_si32( - _mm_add_epi32(sum[0], _mm_shuffle_epi32(sum[0], 0x4E)))) / - 8.0f; - hgreen[1] = (_mm_cvtsi128_si32( - _mm_add_epi32(sum[1], _mm_shuffle_epi32(sum[1], 0x4E)))) / - 8.0f; - - tmp = _mm_add_epi32(sum[0], sum[1]); - vgreen[0] = (_mm_cvtsi128_si32(tmp)) / 8.0f; - vgreen[1] = (_mm_cvtsi128_si32(_mm_shuffle_epi32(tmp, 0x2))) / 8.0f; - - // Compute avg blue value. - // [S0 S0 S1 S1] - sum[0] = _mm_add_epi32(data->blue[0], data->blue[1]); - sum[0] = _mm_add_epi32(sum[0], _mm_shuffle_epi32(sum[0], 0xB1)); - - // [S2 S2 S3 S3] - sum[1] = _mm_add_epi32(data->blue[2], data->blue[3]); - sum[1] = _mm_add_epi32(sum[1], _mm_shuffle_epi32(sum[1], 0xB1)); - - float hblue[2], vblue[2]; - hblue[0] = (_mm_cvtsi128_si32( - _mm_add_epi32(sum[0], _mm_shuffle_epi32(sum[0], 0x4E)))) / - 8.0f; - hblue[1] = (_mm_cvtsi128_si32( - _mm_add_epi32(sum[1], _mm_shuffle_epi32(sum[1], 0x4E)))) / - 8.0f; - - tmp = _mm_add_epi32(sum[0], sum[1]); - vblue[0] = (_mm_cvtsi128_si32(tmp)) / 8.0f; - vblue[1] = (_mm_cvtsi128_si32(_mm_shuffle_epi32(tmp, 0x2))) / 8.0f; - - // TODO(radu.velea): Return int's instead of floats, based on Quality. - output[0] = vblue[0]; - output[1] = vgreen[0]; - output[2] = vred[0]; - - output[3] = vblue[1]; - output[4] = vgreen[1]; - output[5] = vred[1]; - - output[6] = hblue[0]; - output[7] = hgreen[0]; - output[8] = hred[0]; - - output[9] = hblue[1]; - output[10] = hgreen[1]; - output[11] = hred[1]; - - __m128i threshold_upper = _mm_set1_epi32(3); - __m128i threshold_lower = _mm_set1_epi32(-4); - - __m128 factor_v = _mm_set1_ps(31.0f / 255.0f); - __m128 rounding_v = _mm_set1_ps(0.5f); - __m128 h_avg_0 = _mm_set_ps(hblue[0], hgreen[0], hred[0], 0); - __m128 h_avg_1 = _mm_set_ps(hblue[1], hgreen[1], hred[1], 0); - - __m128 v_avg_0 = _mm_set_ps(vblue[0], vgreen[0], vred[0], 0); - __m128 v_avg_1 = _mm_set_ps(vblue[1], vgreen[1], vred[1], 0); - - h_avg_0 = _mm_mul_ps(h_avg_0, factor_v); - h_avg_1 = _mm_mul_ps(h_avg_1, factor_v); - v_avg_0 = _mm_mul_ps(v_avg_0, factor_v); - v_avg_1 = _mm_mul_ps(v_avg_1, factor_v); - - h_avg_0 = _mm_add_ps(h_avg_0, rounding_v); - h_avg_1 = _mm_add_ps(h_avg_1, rounding_v); - v_avg_0 = _mm_add_ps(v_avg_0, rounding_v); - v_avg_1 = _mm_add_ps(v_avg_1, rounding_v); - - __m128i h_avg_0i = _mm_cvttps_epi32(h_avg_0); - __m128i h_avg_1i = _mm_cvttps_epi32(h_avg_1); - - __m128i v_avg_0i = _mm_cvttps_epi32(v_avg_0); - __m128i v_avg_1i = _mm_cvttps_epi32(v_avg_1); - - h_avg_0i = _mm_sub_epi32(h_avg_1i, h_avg_0i); - v_avg_0i = _mm_sub_epi32(v_avg_1i, v_avg_0i); - - __sse_use_diff[0] = - (0 == _mm_movemask_epi8(_mm_cmplt_epi32(v_avg_0i, threshold_lower))); - __sse_use_diff[0] &= - (0 == _mm_movemask_epi8(_mm_cmpgt_epi32(v_avg_0i, threshold_upper))); - - __sse_use_diff[1] = - (0 == _mm_movemask_epi8(_mm_cmplt_epi32(h_avg_0i, threshold_lower))); - __sse_use_diff[1] &= - (0 == _mm_movemask_epi8(_mm_cmpgt_epi32(h_avg_0i, threshold_upper))); -} - -void ComputeLuminance(uint8_t* block, - const Color& base, - const int sub_block_id, - const uint8_t* idx_to_num_tab, - const __sse_data* data, - const uint32_t expected_error) { - uint8_t best_tbl_idx = 0; - uint32_t best_error = 0x7FFFFFFF; - uint8_t best_mod_idx[8][8]; // [table][texel] - - const __m128i base_blue = _mm_set1_epi32(base.channels.b); - const __m128i base_green = _mm_set1_epi32(base.channels.g); - const __m128i base_red = _mm_set1_epi32(base.channels.r); - - __m128i test_red, test_blue, test_green, tmp, tmp_blue, tmp_green, tmp_red; - __m128i block_error, mask; - - // This will have the minimum errors for each 4 pixels. - __m128i first_half_min; - __m128i second_half_min; - - // This will have the matching table index combo for each 4 pixels. - __m128i first_half_pattern; - __m128i second_half_pattern; - - const __m128i first_blue_data_block = data->blue[2 * sub_block_id]; - const __m128i first_green_data_block = data->green[2 * sub_block_id]; - const __m128i first_red_data_block = data->red[2 * sub_block_id]; - - const __m128i second_blue_data_block = data->blue[2 * sub_block_id + 1]; - const __m128i second_green_data_block = data->green[2 * sub_block_id + 1]; - const __m128i second_red_data_block = data->red[2 * sub_block_id + 1]; - - uint32_t min; - // Fail early to increase speed. - long delta = INT32_MAX; - uint32_t last_min = INT32_MAX; - - const uint8_t shuffle_mask[] = { - 0x1B, 0x4E, 0xB1, 0xE4}; // Important they are sorted ascending. - - for (unsigned int tbl_idx = 0; tbl_idx < 8; ++tbl_idx) { - tmp = _mm_set_epi32( - g_codeword_tables[tbl_idx][3], g_codeword_tables[tbl_idx][2], - g_codeword_tables[tbl_idx][1], g_codeword_tables[tbl_idx][0]); - - test_blue = AddAndClamp(tmp, base_blue); - test_green = AddAndClamp(tmp, base_green); - test_red = AddAndClamp(tmp, base_red); - - first_half_min = _mm_set1_epi32(0x7FFFFFFF); - second_half_min = _mm_set1_epi32(0x7FFFFFFF); - - first_half_pattern = _mm_setzero_si128(); - second_half_pattern = _mm_setzero_si128(); - - for (uint8_t imm8 : shuffle_mask) { - switch (imm8) { - case 0x1B: - tmp_blue = _mm_shuffle_epi32(test_blue, 0x1B); - tmp_green = _mm_shuffle_epi32(test_green, 0x1B); - tmp_red = _mm_shuffle_epi32(test_red, 0x1B); - break; - case 0x4E: - tmp_blue = _mm_shuffle_epi32(test_blue, 0x4E); - tmp_green = _mm_shuffle_epi32(test_green, 0x4E); - tmp_red = _mm_shuffle_epi32(test_red, 0x4E); - break; - case 0xB1: - tmp_blue = _mm_shuffle_epi32(test_blue, 0xB1); - tmp_green = _mm_shuffle_epi32(test_green, 0xB1); - tmp_red = _mm_shuffle_epi32(test_red, 0xB1); - break; - case 0xE4: - tmp_blue = _mm_shuffle_epi32(test_blue, 0xE4); - tmp_green = _mm_shuffle_epi32(test_green, 0xE4); - tmp_red = _mm_shuffle_epi32(test_red, 0xE4); - break; - default: - tmp_blue = test_blue; - tmp_green = test_green; - tmp_red = test_red; - } - - tmp = _mm_set1_epi32(imm8); - - block_error = - AddChannelError(GetColorErrorSSE(tmp_blue, first_blue_data_block), - GetColorErrorSSE(tmp_green, first_green_data_block), - GetColorErrorSSE(tmp_red, first_red_data_block)); - - // Save winning pattern. - first_half_pattern = _mm_max_epi16( - first_half_pattern, - _mm_and_si128(tmp, _mm_cmpgt_epi32(first_half_min, block_error))); - // Should use _mm_min_epi32(first_half_min, block_error); from SSE4 - // otherwise we have a small performance penalty. - mask = _mm_cmplt_epi32(block_error, first_half_min); - first_half_min = _mm_add_epi32(_mm_and_si128(mask, block_error), - _mm_andnot_si128(mask, first_half_min)); - - // Compute second part of the block. - block_error = - AddChannelError(GetColorErrorSSE(tmp_blue, second_blue_data_block), - GetColorErrorSSE(tmp_green, second_green_data_block), - GetColorErrorSSE(tmp_red, second_red_data_block)); - - // Save winning pattern. - second_half_pattern = _mm_max_epi16( - second_half_pattern, - _mm_and_si128(tmp, _mm_cmpgt_epi32(second_half_min, block_error))); - // Should use _mm_min_epi32(second_half_min, block_error); from SSE4 - // otherwise we have a small performance penalty. - mask = _mm_cmplt_epi32(block_error, second_half_min); - second_half_min = _mm_add_epi32(_mm_and_si128(mask, block_error), - _mm_andnot_si128(mask, second_half_min)); - } - - first_half_min = _mm_add_epi32(first_half_min, second_half_min); - first_half_min = - _mm_add_epi32(first_half_min, _mm_shuffle_epi32(first_half_min, 0x4E)); - first_half_min = - _mm_add_epi32(first_half_min, _mm_shuffle_epi32(first_half_min, 0xB1)); - - min = _mm_cvtsi128_si32(first_half_min); - - delta = min - last_min; - last_min = min; - - if (min < best_error) { - best_tbl_idx = tbl_idx; - best_error = min; - - best_mod_idx[tbl_idx][0] = - (_mm_cvtsi128_si32(first_half_pattern) >> (0)) & 3; - best_mod_idx[tbl_idx][4] = - (_mm_cvtsi128_si32(second_half_pattern) >> (0)) & 3; - - best_mod_idx[tbl_idx][1] = - (_mm_cvtsi128_si32(_mm_shuffle_epi32(first_half_pattern, 0x1)) >> - (2)) & - 3; - best_mod_idx[tbl_idx][5] = - (_mm_cvtsi128_si32(_mm_shuffle_epi32(second_half_pattern, 0x1)) >> - (2)) & - 3; - - best_mod_idx[tbl_idx][2] = - (_mm_cvtsi128_si32(_mm_shuffle_epi32(first_half_pattern, 0x2)) >> - (4)) & - 3; - best_mod_idx[tbl_idx][6] = - (_mm_cvtsi128_si32(_mm_shuffle_epi32(second_half_pattern, 0x2)) >> - (4)) & - 3; - - best_mod_idx[tbl_idx][3] = - (_mm_cvtsi128_si32(_mm_shuffle_epi32(first_half_pattern, 0x3)) >> - (6)) & - 3; - best_mod_idx[tbl_idx][7] = - (_mm_cvtsi128_si32(_mm_shuffle_epi32(second_half_pattern, 0x3)) >> - (6)) & - 3; - - if (best_error == 0) { - break; - } - } else if (delta > 0 && expected_error < min) { - // The error is growing and is well beyond expected threshold. - break; - } - } - - WriteCodewordTable(block, sub_block_id, best_tbl_idx); - - uint32_t pix_data = 0; - uint8_t mod_idx; - uint8_t pix_idx; - uint32_t lsb; - uint32_t msb; - int texel_num; - - for (unsigned int i = 0; i < 8; ++i) { - mod_idx = best_mod_idx[best_tbl_idx][i]; - pix_idx = g_mod_to_pix[mod_idx]; - - lsb = pix_idx & 0x1; - msb = pix_idx >> 1; - - // Obtain the texel number as specified in the standard. - texel_num = idx_to_num_tab[i]; - pix_data |= msb << (texel_num + 16); - pix_data |= lsb << (texel_num); - } - - WritePixelData(block, pix_data); -} - -void CompressBlock(uint8_t* dst, __sse_data* data) { - // First 3 values are for vertical 1, second 3 vertical 2, third 3 horizontal - // 1, last 3 - // horizontal 2. - float __sse_avg_colors[12] = { - 0, - }; - bool use_differential[2] = {true, true}; - GetAvgColors(data, __sse_avg_colors, use_differential); - Color sub_block_avg[4]; - - // TODO(radu.velea): Remove floating point operations and use only int's + - // normal rounding and shifts for reduced Quality. - for (int i = 0, j = 1; i < 4; i += 2, j += 2) { - if (use_differential[i / 2] == false) { - sub_block_avg[i] = MakeColor444(&__sse_avg_colors[i * 3]); - sub_block_avg[j] = MakeColor444(&__sse_avg_colors[j * 3]); - } else { - sub_block_avg[i] = MakeColor555(&__sse_avg_colors[i * 3]); - sub_block_avg[j] = MakeColor555(&__sse_avg_colors[j * 3]); - } - } - - __m128i red_avg[2], green_avg[2], blue_avg[2]; - - // TODO(radu.velea): Perfect accuracy, maybe skip floating variables. - blue_avg[0] = _mm_set_epi32(static_cast<int>(__sse_avg_colors[3]), - static_cast<int>(__sse_avg_colors[3]), - static_cast<int>(__sse_avg_colors[0]), - static_cast<int>(__sse_avg_colors[0])); - - green_avg[0] = _mm_set_epi32(static_cast<int>(__sse_avg_colors[4]), - static_cast<int>(__sse_avg_colors[4]), - static_cast<int>(__sse_avg_colors[1]), - static_cast<int>(__sse_avg_colors[1])); - - red_avg[0] = _mm_set_epi32(static_cast<int>(__sse_avg_colors[5]), - static_cast<int>(__sse_avg_colors[5]), - static_cast<int>(__sse_avg_colors[2]), - static_cast<int>(__sse_avg_colors[2])); - - uint32_t vertical_error[2]; - GetVerticalError(data, blue_avg, green_avg, red_avg, vertical_error); - - // TODO(radu.velea): Perfect accuracy, maybe skip floating variables. - blue_avg[0] = _mm_set1_epi32(static_cast<int>(__sse_avg_colors[6])); - blue_avg[1] = _mm_set1_epi32(static_cast<int>(__sse_avg_colors[9])); - - green_avg[0] = _mm_set1_epi32(static_cast<int>(__sse_avg_colors[7])); - green_avg[1] = _mm_set1_epi32(static_cast<int>(__sse_avg_colors[10])); - - red_avg[0] = _mm_set1_epi32(static_cast<int>(__sse_avg_colors[8])); - red_avg[1] = _mm_set1_epi32(static_cast<int>(__sse_avg_colors[11])); - - uint32_t horizontal_error[2]; - GetHorizontalError(data, blue_avg, green_avg, red_avg, horizontal_error); - - bool flip = (horizontal_error[0] + horizontal_error[1]) < - (vertical_error[0] + vertical_error[1]); - uint32_t* expected_errors = flip ? horizontal_error : vertical_error; - - // Clear destination buffer so that we can "or" in the results. - memset(dst, 0, 8); - - WriteDiff(dst, use_differential[!!flip]); - WriteFlip(dst, flip); - - uint8_t sub_block_off_0 = flip ? 2 : 0; - uint8_t sub_block_off_1 = sub_block_off_0 + 1; - - if (use_differential[!!flip]) { - WriteColors555(dst, sub_block_avg[sub_block_off_0], - sub_block_avg[sub_block_off_1]); - } else { - WriteColors444(dst, sub_block_avg[sub_block_off_0], - sub_block_avg[sub_block_off_1]); - } - - if (!flip) { - // Transpose vertical data into horizontal lines. - __m128i tmp; - for (int i = 0; i < 4; i += 2) { - tmp = data->blue[i]; - data->blue[i] = _mm_add_epi32( - _mm_move_epi64(data->blue[i]), - _mm_shuffle_epi32(_mm_move_epi64(data->blue[i + 1]), 0x4E)); - data->blue[i + 1] = _mm_add_epi32( - _mm_move_epi64(_mm_shuffle_epi32(tmp, 0x4E)), - _mm_shuffle_epi32( - _mm_move_epi64(_mm_shuffle_epi32(data->blue[i + 1], 0x4E)), - 0x4E)); - - tmp = data->green[i]; - data->green[i] = _mm_add_epi32( - _mm_move_epi64(data->green[i]), - _mm_shuffle_epi32(_mm_move_epi64(data->green[i + 1]), 0x4E)); - data->green[i + 1] = _mm_add_epi32( - _mm_move_epi64(_mm_shuffle_epi32(tmp, 0x4E)), - _mm_shuffle_epi32( - _mm_move_epi64(_mm_shuffle_epi32(data->green[i + 1], 0x4E)), - 0x4E)); - - tmp = data->red[i]; - data->red[i] = _mm_add_epi32( - _mm_move_epi64(data->red[i]), - _mm_shuffle_epi32(_mm_move_epi64(data->red[i + 1]), 0x4E)); - data->red[i + 1] = _mm_add_epi32( - _mm_move_epi64(_mm_shuffle_epi32(tmp, 0x4E)), - _mm_shuffle_epi32( - _mm_move_epi64(_mm_shuffle_epi32(data->red[i + 1], 0x4E)), 0x4E)); - } - - tmp = data->blue[1]; - data->blue[1] = data->blue[2]; - data->blue[2] = tmp; - - tmp = data->green[1]; - data->green[1] = data->green[2]; - data->green[2] = tmp; - - tmp = data->red[1]; - data->red[1] = data->red[2]; - data->red[2] = tmp; - } - - // Compute luminance for the first sub block. - ComputeLuminance(dst, sub_block_avg[sub_block_off_0], 0, - g_idx_to_num[sub_block_off_0], data, - SetETC1MaxError(expected_errors[0])); - // Compute luminance for the second sub block. - ComputeLuminance(dst, sub_block_avg[sub_block_off_1], 1, - g_idx_to_num[sub_block_off_1], data, - SetETC1MaxError(expected_errors[1])); -} - -static void ExtractBlock(uint8_t* dst, const uint8_t* src, int width) { - for (int j = 0; j < 4; ++j) { - memcpy(&dst[j * 4 * 4], src, 4 * 4); - src += width * 4; - } -} - -inline bool TransposeBlock(uint8_t* block, __m128i* transposed) { - // This function transforms an incommig block of RGBA or GBRA pixels into 4 - // registers, each containing the data corresponding for a single channel. - // Ex: transposed[0] will have all the R values for a RGBA block, - // transposed[1] will have G, etc. - // The values are packed as 8 bit unsigned values in the SSE registers. - - // Before doing any work we check if the block is solid. - __m128i tmp3, tmp2, tmp1, tmp0; - __m128i test_solid = _mm_set1_epi32(*((uint32_t*)block)); - uint16_t mask = 0xFFFF; - - // a0,a1,a2,...a7, ...a15 - transposed[0] = _mm_loadu_si128((__m128i*)(block)); - // b0, b1,b2,...b7.... b15 - transposed[1] = _mm_loadu_si128((__m128i*)(block + 16)); - // c0, c1,c2,...c7....c15 - transposed[2] = _mm_loadu_si128((__m128i*)(block + 32)); - // d0,d1,d2,...d7....d15 - transposed[3] = _mm_loadu_si128((__m128i*)(block + 48)); - - for (int i = 0; i < 4; i++) { - mask &= _mm_movemask_epi8(_mm_cmpeq_epi8(transposed[i], test_solid)); - } - - if (mask == 0xFFFF) { - // Block is solid, no need to do any more work. - return false; - } - - // a0,b0, a1,b1, a2,b2, a3,b3,....a7,b7 - tmp0 = _mm_unpacklo_epi8(transposed[0], transposed[1]); - // c0,d0, c1,d1, c2,d2, c3,d3,... c7,d7 - tmp1 = _mm_unpacklo_epi8(transposed[2], transposed[3]); - // a8,b8, a9,b9, a10,b10, a11,b11,...a15,b15 - tmp2 = _mm_unpackhi_epi8(transposed[0], transposed[1]); - // c8,d8, c9,d9, c10,d10, c11,d11,...c15,d15 - tmp3 = _mm_unpackhi_epi8(transposed[2], transposed[3]); - - // a0,a8, b0,b8, a1,a9, b1,b9, ....a3,a11, b3,b11 - transposed[0] = _mm_unpacklo_epi8(tmp0, tmp2); - // a4,a12, b4,b12, a5,a13, b5,b13,....a7,a15,b7,b15 - transposed[1] = _mm_unpackhi_epi8(tmp0, tmp2); - // c0,c8, d0,d8, c1,c9, d1,d9.....d3,d11 - transposed[2] = _mm_unpacklo_epi8(tmp1, tmp3); - // c4,c12,d4,d12, c5,c13, d5,d13,....d7,d15 - transposed[3] = _mm_unpackhi_epi8(tmp1, tmp3); - - // a0,a8, b0,b8, c0,c8, d0,d8, a1,a9, b1,b9, c1,c9, d1,d9 - tmp0 = _mm_unpacklo_epi32(transposed[0], transposed[2]); - // a2,a10, b2,b10, c2,c10, d2,d10, a3,a11, b3,b11, c3,c11, d3,d11 - tmp1 = _mm_unpackhi_epi32(transposed[0], transposed[2]); - // a4,a12, b4,b12, c4,c12, d4,d12, a5,a13, b5,b13, c5,c13, d5,d13 - tmp2 = _mm_unpacklo_epi32(transposed[1], transposed[3]); - // a6,a14, b6,b14, c6,c14, d6,d14, a7,a15, b7,b15, c7,c15, d7,d15 - tmp3 = _mm_unpackhi_epi32(transposed[1], transposed[3]); - - // a0,a4, a8,a12, b0,b4, b8,b12, c0,c4, c8,c12, d0,d4, d8,d12 - transposed[0] = _mm_unpacklo_epi8(tmp0, tmp2); - // a1,a5, a9,a13, b1,b5, b9,b13, c1,c5, c9,c13, d1,d5, d9,d13 - transposed[1] = _mm_unpackhi_epi8(tmp0, tmp2); - // a2,a6, a10,a14, b2,b6, b10,b14, c2,c6, c10,c14, d2,d6, d10,d14 - transposed[2] = _mm_unpacklo_epi8(tmp1, tmp3); - // a3,a7, a11,a15, b3,b7, b11,b15, c3,c7, c11,c15, d3,d7, d11,d15 - transposed[3] = _mm_unpackhi_epi8(tmp1, tmp3); - - return true; -} - -inline void UnpackBlock(__m128i* packed, - __m128i* red, - __m128i* green, - __m128i* blue, - __m128i* alpha) { - const __m128i zero = _mm_set1_epi8(0); - __m128i tmp_low, tmp_high; - - // Unpack red. - tmp_low = _mm_unpacklo_epi8(packed[0], zero); - tmp_high = _mm_unpackhi_epi8(packed[0], zero); - - red[0] = _mm_unpacklo_epi16(tmp_low, zero); - red[1] = _mm_unpackhi_epi16(tmp_low, zero); - - red[2] = _mm_unpacklo_epi16(tmp_high, zero); - red[3] = _mm_unpackhi_epi16(tmp_high, zero); - - // Unpack green. - tmp_low = _mm_unpacklo_epi8(packed[1], zero); - tmp_high = _mm_unpackhi_epi8(packed[1], zero); - - green[0] = _mm_unpacklo_epi16(tmp_low, zero); - green[1] = _mm_unpackhi_epi16(tmp_low, zero); - - green[2] = _mm_unpacklo_epi16(tmp_high, zero); - green[3] = _mm_unpackhi_epi16(tmp_high, zero); - - // Unpack blue. - tmp_low = _mm_unpacklo_epi8(packed[2], zero); - tmp_high = _mm_unpackhi_epi8(packed[2], zero); - - blue[0] = _mm_unpacklo_epi16(tmp_low, zero); - blue[1] = _mm_unpackhi_epi16(tmp_low, zero); - - blue[2] = _mm_unpacklo_epi16(tmp_high, zero); - blue[3] = _mm_unpackhi_epi16(tmp_high, zero); - - // Unpack alpha - unused for ETC1. - tmp_low = _mm_unpacklo_epi8(packed[3], zero); - tmp_high = _mm_unpackhi_epi8(packed[3], zero); - - alpha[0] = _mm_unpacklo_epi16(tmp_low, zero); - alpha[1] = _mm_unpackhi_epi16(tmp_low, zero); - - alpha[2] = _mm_unpacklo_epi16(tmp_high, zero); - alpha[3] = _mm_unpackhi_epi16(tmp_high, zero); -} - -inline void CompressSolid(uint8_t* dst, uint8_t* block) { - // Clear destination buffer so that we can "or" in the results. - memset(dst, 0, 8); - - const float src_color_float[3] = {static_cast<float>(block[0]), - static_cast<float>(block[1]), - static_cast<float>(block[2])}; - const Color base = MakeColor555(src_color_float); - const __m128i base_v = - _mm_set_epi32(0, base.channels.r, base.channels.g, base.channels.b); - - const __m128i constant = _mm_set_epi32(0, block[2], block[1], block[0]); - __m128i lum; - __m128i colors[4]; - static const __m128i rgb = - _mm_set_epi32(0, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); - - WriteDiff(dst, true); - WriteFlip(dst, false); - - WriteColors555(dst, base, base); - - uint8_t best_tbl_idx = 0; - uint8_t best_mod_idx = 0; - uint32_t best_mod_err = INT32_MAX; - - for (unsigned int tbl_idx = 0; tbl_idx < 8; ++tbl_idx) { - lum = _mm_set_epi32( - g_codeword_tables[tbl_idx][3], g_codeword_tables[tbl_idx][2], - g_codeword_tables[tbl_idx][1], g_codeword_tables[tbl_idx][0]); - colors[0] = AddAndClamp(base_v, _mm_shuffle_epi32(lum, 0x0)); - colors[1] = AddAndClamp(base_v, _mm_shuffle_epi32(lum, 0x55)); - colors[2] = AddAndClamp(base_v, _mm_shuffle_epi32(lum, 0xAA)); - colors[3] = AddAndClamp(base_v, _mm_shuffle_epi32(lum, 0xFF)); - - for (int i = 0; i < 4; i++) { - uint32_t mod_err = - SumSSE(GetColorErrorSSE(constant, _mm_and_si128(colors[i], rgb))); - colors[i] = _mm_and_si128(colors[i], rgb); - if (mod_err < best_mod_err) { - best_tbl_idx = tbl_idx; - best_mod_idx = i; - best_mod_err = mod_err; - - if (mod_err == 0) { - break; // We cannot do any better than this. - } - } - } - } - - WriteCodewordTable(dst, 0, best_tbl_idx); - WriteCodewordTable(dst, 1, best_tbl_idx); - - uint8_t pix_idx = g_mod_to_pix[best_mod_idx]; - uint32_t lsb = pix_idx & 0x1; - uint32_t msb = pix_idx >> 1; - - uint32_t pix_data = 0; - for (unsigned int i = 0; i < 2; ++i) { - for (unsigned int j = 0; j < 8; ++j) { - // Obtain the texel number as specified in the standard. - int texel_num = g_idx_to_num[i][j]; - pix_data |= msb << (texel_num + 16); - pix_data |= lsb << (texel_num); - } - } - - WritePixelData(dst, pix_data); -} - -} // namespace - -void TextureCompressorETC1SSE::Compress(const uint8_t* src, - uint8_t* dst, - int width, - int height, - Quality quality) { - DCHECK_GE(width, 4); - DCHECK_EQ((width & 3), 0); - DCHECK_GE(height, 4); - DCHECK_EQ((height & 3), 0); - - alignas(16) uint8_t block[64]; - __m128i packed[4]; - __m128i red[4], green[4], blue[4], alpha[4]; - __sse_data data; - - for (int y = 0; y < height; y += 4, src += width * 4 * 4) { - for (int x = 0; x < width; x += 4, dst += 8) { - ExtractBlock(block, src + x * 4, width); - if (TransposeBlock(block, packed) == false) { - CompressSolid(dst, block); - } else { - UnpackBlock(packed, blue, green, red, alpha); - - data.block = block; - data.packed = packed; - data.red = red; - data.blue = blue; - data.green = green; - - CompressBlock(dst, &data); - } - } - } -} - -} // namespace cc diff --git a/chromium/cc/raster/texture_compressor_etc1_sse.h b/chromium/cc/raster/texture_compressor_etc1_sse.h deleted file mode 100644 index 3c186c63ce7..00000000000 --- a/chromium/cc/raster/texture_compressor_etc1_sse.h +++ /dev/null @@ -1,34 +0,0 @@ -// Copyright 2015 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#ifndef CC_RASTER_TEXTURE_COMPRESSOR_ETC1_SSE_H_ -#define CC_RASTER_TEXTURE_COMPRESSOR_ETC1_SSE_H_ - -#include <stdint.h> - -#include "base/macros.h" -#include "cc/raster/texture_compressor.h" - -namespace cc { - -class CC_EXPORT TextureCompressorETC1SSE : public TextureCompressor { - public: - TextureCompressorETC1SSE() {} - - // Compress a texture using ETC1. Note that the |quality| parameter is - // ignored. The current implementation does not support different quality - // settings. - void Compress(const uint8_t* src, - uint8_t* dst, - int width, - int height, - Quality quality) override; - - private: - DISALLOW_COPY_AND_ASSIGN(TextureCompressorETC1SSE); -}; - -} // namespace cc - -#endif // CC_RASTER_TEXTURE_COMPRESSOR_ETC1_SSE_H_ diff --git a/chromium/cc/raster/texture_compressor_etc1_unittest.cc b/chromium/cc/raster/texture_compressor_etc1_unittest.cc deleted file mode 100644 index 4857b692612..00000000000 --- a/chromium/cc/raster/texture_compressor_etc1_unittest.cc +++ /dev/null @@ -1,57 +0,0 @@ -// Copyright 2015 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include "cc/raster/texture_compressor.h" - -#include <stdint.h> - -#include "testing/gtest/include/gtest/gtest.h" - -namespace cc { -namespace { - -const int kImageWidth = 256; -const int kImageHeight = 256; -const int kImageChannels = 4; -const int kImageSizeInBytes = kImageWidth * kImageHeight * kImageChannels; - -TEST(TextureCompressorETC1Test, Compress256x256Ratio) { - std::unique_ptr<TextureCompressor> compressor = - TextureCompressor::Create(TextureCompressor::kFormatETC1); - uint8_t src[kImageSizeInBytes]; - uint8_t dst[kImageSizeInBytes]; - const unsigned int kImagePoison = 0xDEADBEEF; - - // Poison destination bytes so we can see how much has been - // overwritten by compression algorithm. - uint32_t* dst_32 = reinterpret_cast<uint32_t*>(dst); - for (int i = 0; i < kImageWidth * kImageHeight; i++) { - dst_32[i] = kImagePoison; - } - - // Generate test texture. - for (int i = 0; i < kImageSizeInBytes; i++) { - src[i] = i % 256; - } - - compressor->Compress(src, dst, kImageWidth, kImageHeight, - TextureCompressor::kQualityLow); - - int compressed_size = 0; - for (compressed_size = 0; compressed_size < kImageWidth * kImageHeight; - compressed_size++) { - if (dst_32[compressed_size] == kImagePoison) { - // Represents size in bytes of the compressed block. - compressed_size = compressed_size * 4; - break; - } - } - - // Check if compression ratio is 8:1 for RGBA or BGRA images, after discarding - // alpha channel. - EXPECT_EQ(kImageSizeInBytes, compressed_size * 8); -} - -} // namespace -} // namespace cc diff --git a/chromium/cc/raster/texture_compressor_perftest.cc b/chromium/cc/raster/texture_compressor_perftest.cc deleted file mode 100644 index e444c30b33f..00000000000 --- a/chromium/cc/raster/texture_compressor_perftest.cc +++ /dev/null @@ -1,123 +0,0 @@ -// Copyright 2015 The Chromium Authors. All rights reserved. -// Use of this source code is governed by a BSD-style license that can be -// found in the LICENSE file. - -#include <stdint.h> - -#include "base/logging.h" -#include "cc/base/lap_timer.h" -#include "cc/raster/texture_compressor.h" -#include "testing/gtest/include/gtest/gtest.h" -#include "testing/perf/perf_test.h" - -namespace cc { -namespace { - -const int kTimeLimitMillis = 2000; -const int kWarmupRuns = 5; -const int kTimeCheckInterval = 10; - -const int kImageWidth = 256; -const int kImageHeight = 256; -const int kImageChannels = 4; -const int kImageSizeInBytes = kImageWidth * kImageHeight * kImageChannels; - -std::string FormatName(TextureCompressor::Format format) { - switch (format) { - case TextureCompressor::kFormatETC1: - return "ETC1"; - } - - NOTREACHED(); - return ""; -} - -std::string QualityName(TextureCompressor::Quality quality) { - switch (quality) { - case TextureCompressor::kQualityLow: - return "Low"; - case TextureCompressor::kQualityMedium: - return "Medium"; - case TextureCompressor::kQualityHigh: - return "High"; - } - - NOTREACHED(); - return ""; -} - -class TextureCompressorPerfTest - : public testing::TestWithParam< - ::testing::tuple<TextureCompressor::Quality, - TextureCompressor::Format>> { - public: - TextureCompressorPerfTest() - : timer_(kWarmupRuns, - base::TimeDelta::FromMilliseconds(kTimeLimitMillis), - kTimeCheckInterval) {} - - void SetUp() override { - TextureCompressor::Format format = ::testing::get<1>(GetParam()); - compressor_ = TextureCompressor::Create(format); - } - - void RunTest(const std::string& name) { - TextureCompressor::Quality quality = ::testing::get<0>(GetParam()); - timer_.Reset(); - do { - compressor_->Compress(src_, dst_, kImageWidth, kImageHeight, quality); - timer_.NextLap(); - } while (!timer_.HasTimeLimitExpired()); - - TextureCompressor::Format format = ::testing::get<1>(GetParam()); - std::string str = FormatName(format) + " " + QualityName(quality); - perf_test::PrintResult("Compress256x256", name, str, timer_.MsPerLap(), - "us", true); - } - - protected: - LapTimer timer_; - std::unique_ptr<TextureCompressor> compressor_; - uint8_t src_[kImageSizeInBytes]; - uint8_t dst_[kImageSizeInBytes]; -}; - -TEST_P(TextureCompressorPerfTest, Compress256x256BlackAndWhiteGradientImage) { - for (int i = 0; i < kImageSizeInBytes; ++i) - src_[i] = i % 256; - - RunTest("BlackAndWhiteGradientImage"); -} - -TEST_P(TextureCompressorPerfTest, Compress256x256SolidBlackImage) { - memset(src_, 0, kImageSizeInBytes); - - RunTest("SolidBlackImage"); -} - -TEST_P(TextureCompressorPerfTest, Compress256x256SolidColorImage) { - for (int i = 0; i < kImageSizeInBytes; ++i) - src_[i] = (4 - i % 4) * 50; - - RunTest("SolidColorImage"); -} - -TEST_P(TextureCompressorPerfTest, Compress256x256RandomColorImage) { - unsigned int kImageSeed = 1234567890; - srand(kImageSeed); - for (int i = 0; i < kImageSizeInBytes; ++i) - src_[i] = rand() % 256; // NOLINT - - RunTest("RandomColorImage"); -} - -INSTANTIATE_TEST_CASE_P( - TextureCompressorPerfTests, - TextureCompressorPerfTest, - ::testing::Combine(::testing::Values(TextureCompressor::kQualityLow, - TextureCompressor::kQualityMedium, - TextureCompressor::kQualityHigh), - ::testing::Values(TextureCompressor::kFormatETC1))); - -} // namespace -} // namespace cc diff --git a/chromium/cc/raster/zero_copy_raster_buffer_provider.cc b/chromium/cc/raster/zero_copy_raster_buffer_provider.cc index 037e8a366ce..21010d76efd 100644 --- a/chromium/cc/raster/zero_copy_raster_buffer_provider.cc +++ b/chromium/cc/raster/zero_copy_raster_buffer_provider.cc @@ -271,4 +271,8 @@ uint64_t ZeroCopyRasterBufferProvider::SetReadyToDrawCallback( void ZeroCopyRasterBufferProvider::Shutdown() {} +bool ZeroCopyRasterBufferProvider::CheckRasterFinishedQueries() { + return false; +} + } // namespace cc diff --git a/chromium/cc/raster/zero_copy_raster_buffer_provider.h b/chromium/cc/raster/zero_copy_raster_buffer_provider.h index 68f78bfb2c7..106f7c9bbc7 100644 --- a/chromium/cc/raster/zero_copy_raster_buffer_provider.h +++ b/chromium/cc/raster/zero_copy_raster_buffer_provider.h @@ -49,6 +49,7 @@ class CC_EXPORT ZeroCopyRasterBufferProvider : public RasterBufferProvider { const base::Closure& callback, uint64_t pending_callback_id) const override; void Shutdown() override; + bool CheckRasterFinishedQueries() override; private: std::unique_ptr<base::trace_event::ConvertableToTraceFormat> StateAsValue() |