mirror of
https://github.com/JetBrains/JetBrainsRuntime.git
synced 2025-12-06 09:29:38 +01:00
JBR-7725 Vulkan: low performance in SwingMark
Removed extra synchronization in blits and extra copy (cherry picked from commit 5411f2a1df5e6a6ed60c27837b414799b80d6fa5)
This commit is contained in:
@@ -95,6 +95,11 @@ static AlphaType getSrcAlphaType(jshort srctype) {
|
||||
ALPHA_TYPE_PRE_MULTIPLIED : ALPHA_TYPE_STRAIGHT;
|
||||
}
|
||||
|
||||
static void VKTexturePoolTexture_Dispose(VKDevice* device, void* ctx) {
|
||||
VKTexturePoolHandle* hnd = (VKTexturePoolHandle*) ctx;
|
||||
VKTexturePoolHandle_ReleaseTexture(hnd);
|
||||
}
|
||||
|
||||
static void VKBlitSwToTextureViaPooledTexture(VKRenderingContext* context,
|
||||
VKSDOps *dstOps,
|
||||
const SurfaceDataRasInfo *srcInfo, jshort srctype, jint hint,
|
||||
@@ -104,8 +109,6 @@ static void VKBlitSwToTextureViaPooledTexture(VKRenderingContext* context,
|
||||
|
||||
const int sw = srcInfo->bounds.x2 - srcInfo->bounds.x1;
|
||||
const int sh = srcInfo->bounds.y2 - srcInfo->bounds.y1;
|
||||
const int dw = dx2 - dx1;
|
||||
const int dh = dy2 - dy1;
|
||||
|
||||
ARRAY(VKTxVertex) vertices = ARRAY_ALLOC(VKTxVertex, 4);
|
||||
/*
|
||||
@@ -129,19 +132,18 @@ static void VKBlitSwToTextureViaPooledTexture(VKRenderingContext* context,
|
||||
VKBuffer* renderVertexBuffer = ARRAY_TO_VERTEX_BUF(device, vertices);
|
||||
ARRAY_FREE(vertices);
|
||||
|
||||
const char *raster = srcInfo->rasBase;
|
||||
raster += (uint32_t)srcInfo->bounds.y1 * (uint32_t)srcInfo->scanStride + (uint32_t)srcInfo->bounds.x1 * (uint32_t)srcInfo->pixelStride;
|
||||
J2dTraceLn4(J2D_TRACE_VERBOSE, "replaceTextureRegion src (dw, dh) : [%d, %d] dest (dx1, dy1) =[%d, %d]",
|
||||
dw, dh, dx1, dy1);
|
||||
uint32_t dataSize = sw * sh * srcInfo->pixelStride;
|
||||
char* data = malloc(dataSize);
|
||||
// copy src pixels inside src bounds to buff
|
||||
for (int row = 0; row < sh; row++) {
|
||||
memcpy(data + (row * sw * srcInfo->pixelStride), raster, sw * srcInfo->pixelStride);
|
||||
raster += (uint32_t)srcInfo->scanStride;
|
||||
}
|
||||
VKBuffer *buffer = VKBuffer_CreateFromData(device, data, dataSize);
|
||||
free(data);
|
||||
(dx2 - dx1), (dy2 - dy1), dx1, dy1);
|
||||
VKBuffer *buffer =
|
||||
VKBuffer_CreateFromRaster(device, (VKBuffer_RasterInfo){
|
||||
.data = srcInfo->rasBase,
|
||||
.x1 = srcInfo->bounds.x1,
|
||||
.y1 = srcInfo->bounds.y1,
|
||||
.w = sw,
|
||||
.h = sh,
|
||||
.pixelStride = srcInfo->pixelStride,
|
||||
.scanStride = srcInfo->scanStride
|
||||
}, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_ACCESS_TRANSFER_READ_BIT);
|
||||
|
||||
VkCommandBuffer cb = VKRenderer_Record(device->renderer);
|
||||
{
|
||||
@@ -181,15 +183,9 @@ static void VKBlitSwToTextureViaPooledTexture(VKRenderingContext* context,
|
||||
VkDescriptorSet srcDescriptorSet = VKImage_GetDescriptorSet(device, src, type.format, type.swizzle);
|
||||
VKRenderer_TextureRender(srcDescriptorSet, renderVertexBuffer->handle, 4, hint, SAMPLER_WRAP_BORDER);
|
||||
|
||||
// TODO: Not optimal but required for releasing raster buffer. Such Buffers should also be managed by special pools
|
||||
VKRenderer_FlushSurface(dstOps);
|
||||
VKRenderer_Flush(device->renderer);
|
||||
VKRenderer_Sync(device->renderer);
|
||||
// TODO: Track lifecycle of the texture to avoid reuse of occupied texture
|
||||
VKTexturePoolHandle_ReleaseTexture(hnd);
|
||||
VKBuffer_Destroy(device, buffer);
|
||||
// TODO: Add proper sync for renderVertexBuffer
|
||||
// VKBuffer_Destroy(device, renderVertexBuffer);
|
||||
VKRenderer_DisposeOnCleanup(device->renderer, VKTexturePoolTexture_Dispose, hnd);
|
||||
VKRenderer_DisposeOnCleanup(device->renderer, VKBuffer_Dispose, buffer);
|
||||
}
|
||||
|
||||
static void VKBlitTextureToTexture(VKRenderingContext* context, VKImage* src, VkBool32 srcOpaque, jint hint,
|
||||
@@ -246,12 +242,8 @@ static void VKBlitTextureToTexture(VKRenderingContext* context, VKImage* src, Vk
|
||||
VkDescriptorSet srcDescriptorSet = VKImage_GetDescriptorSet(device, src, src->format, srcOpaque ? OPAQUE_SWIZZLE : 0);
|
||||
VKRenderer_TextureRender(srcDescriptorSet, renderVertexBuffer->handle, 4, hint, SAMPLER_WRAP_BORDER);
|
||||
|
||||
// TODO: Not optimal but required for releasing raster buffer. Such Buffers should also be managed by special pools
|
||||
// TODO: Also, consider using VKRenderer_FlushRenderPass here to process pending command
|
||||
VKRenderer_Flush(device->renderer);
|
||||
VKRenderer_Sync(device->renderer);
|
||||
// TODO: Add proper sync for renderVertexBuffer
|
||||
// VKBuffer_Destroy(device, renderVertexBuffer);
|
||||
VKRenderer_FlushSurface(context->surface);
|
||||
VKRenderer_DisposeOnCleanup(device->renderer, VKBuffer_Dispose, renderVertexBuffer);
|
||||
}
|
||||
|
||||
static jboolean clipDestCoords(
|
||||
|
||||
@@ -30,6 +30,11 @@
|
||||
#include "VKAllocator.h"
|
||||
#include "VKBuffer.h"
|
||||
#include "VKDevice.h"
|
||||
#include "VKRenderer.h"
|
||||
|
||||
#define VK_BUFFER_HOST_COHERENT_MEMORY
|
||||
|
||||
const size_t VK_BUFFER_CREATE_THRESHOLD = 0xDC000;
|
||||
|
||||
static VKMemory VKBuffer_DestroyBuffersOnFailure(VKDevice* device, VKMemory page, uint32_t bufferCount, VKBuffer* buffers) {
|
||||
assert(device != NULL && device->allocator != NULL);
|
||||
@@ -220,41 +225,82 @@ VKBuffer* VKBuffer_Create(VKDevice* device, VkDeviceSize size,
|
||||
VKBuffer_Destroy(device, buffer);
|
||||
return NULL;
|
||||
}
|
||||
buffer->lastStage = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;
|
||||
buffer->lastAccess = 0;
|
||||
return buffer;
|
||||
}
|
||||
|
||||
VKBuffer* VKBuffer_CreateFromData(VKDevice* device, void* vertices, VkDeviceSize bufferSize)
|
||||
void VKBuffer_Dispose(VKDevice* device, void* data) {
|
||||
VKBuffer* buffer = (VKBuffer*) data;
|
||||
VKBuffer_Destroy(device, buffer);
|
||||
}
|
||||
|
||||
VKBuffer *VKBuffer_CreateFromRaster(VKDevice *device,
|
||||
VKBuffer_RasterInfo info,
|
||||
VkPipelineStageFlags stage,
|
||||
VkAccessFlags access)
|
||||
{
|
||||
VKBuffer* buffer = VKBuffer_Create(device, bufferSize,
|
||||
VK_BUFFER_USAGE_TRANSFER_DST_BIT | VK_BUFFER_USAGE_VERTEX_BUFFER_BIT | VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
|
||||
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
|
||||
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
|
||||
uint32_t dataSize = info.w * info.h * info.pixelStride;
|
||||
VKBuffer *buffer = VKBuffer_Create(device, dataSize,
|
||||
VK_BUFFER_USAGE_TRANSFER_DST_BIT |
|
||||
VK_BUFFER_USAGE_VERTEX_BUFFER_BIT |
|
||||
VK_BUFFER_USAGE_TRANSFER_SRC_BIT,
|
||||
VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
|
||||
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT
|
||||
#ifdef VK_BUFFER_HOST_COHERENT_MEMORY
|
||||
| VK_MEMORY_PROPERTY_HOST_COHERENT_BIT
|
||||
#endif
|
||||
);
|
||||
|
||||
void* data;
|
||||
VK_IF_ERROR(device->vkMapMemory(device->handle, buffer->range.memory, 0, VK_WHOLE_SIZE, 0, &data)) {
|
||||
VKBuffer_Destroy(device, buffer);
|
||||
return NULL;
|
||||
}
|
||||
memcpy(data, vertices, bufferSize);
|
||||
|
||||
VkMappedMemoryRange memoryRange = {
|
||||
.sType = VK_STRUCTURE_TYPE_MAPPED_MEMORY_RANGE,
|
||||
.pNext = NULL,
|
||||
.memory = buffer->range.memory,
|
||||
.offset = 0,
|
||||
.size = VK_WHOLE_SIZE
|
||||
};
|
||||
char* raster = (char*)info.data + info.y1 * info.scanStride + info.x1 * info.pixelStride;;
|
||||
|
||||
|
||||
VK_IF_ERROR(device->vkFlushMappedMemoryRanges(device->handle, 1, &memoryRange)) {
|
||||
VKBuffer_Destroy(device, buffer);
|
||||
return NULL;
|
||||
// copy src pixels inside src bounds to buff
|
||||
for (size_t row = 0; row < info.h; row++) {
|
||||
memcpy((char*)data + (row * info.w * info.pixelStride), raster, info.w * info.pixelStride);
|
||||
raster += (uint32_t) info.scanStride;
|
||||
}
|
||||
device->vkUnmapMemory(device->handle, buffer->range.memory);
|
||||
|
||||
#ifndef VK_BUFFER_HOST_COHERENT_MEMORY
|
||||
device->vkFlushMappedMemoryRanges(device->handle, 1, &buffer->range);
|
||||
#endif
|
||||
|
||||
device->vkUnmapMemory(device->handle, buffer->range.memory);
|
||||
{
|
||||
VkCommandBuffer cb = VKRenderer_Record(device->renderer);
|
||||
VkBufferMemoryBarrier barrier;
|
||||
VKBarrierBatch barrierBatch = {};
|
||||
VKRenderer_AddBufferBarrier(&barrier, &barrierBatch, buffer,
|
||||
stage, access);
|
||||
|
||||
if (barrierBatch.barrierCount > 0) {
|
||||
device->vkCmdPipelineBarrier(cb, barrierBatch.srcStages,
|
||||
barrierBatch.dstStages,
|
||||
0, 0, NULL,
|
||||
barrierBatch.barrierCount, &barrier,
|
||||
0, NULL);
|
||||
}
|
||||
}
|
||||
return buffer;
|
||||
}
|
||||
|
||||
|
||||
VKBuffer* VKBuffer_CreateFromData(VKDevice* device, void* data, VkDeviceSize dataSize,
|
||||
VkPipelineStageFlags stage, VkAccessFlags access) {
|
||||
return VKBuffer_CreateFromRaster(device, (VKBuffer_RasterInfo) {
|
||||
.data = data,
|
||||
.w = dataSize,
|
||||
.h = 1,
|
||||
.scanStride = dataSize,
|
||||
.pixelStride = 1
|
||||
}, stage, access);
|
||||
}
|
||||
|
||||
void VKBuffer_Destroy(VKDevice* device, VKBuffer* buffer) {
|
||||
if (buffer != NULL) {
|
||||
if (buffer->handle != VK_NULL_HANDLE) {
|
||||
|
||||
@@ -30,10 +30,13 @@
|
||||
#include "VKTypes.h"
|
||||
|
||||
#define ARRAY_TO_VERTEX_BUF(device, vertices) \
|
||||
VKBuffer_CreateFromData(device, vertices, ARRAY_SIZE(vertices)*sizeof (vertices[0]))
|
||||
VKBuffer_CreateFromData(device, vertices, ARRAY_SIZE(vertices)*sizeof (vertices[0]),\
|
||||
VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT)
|
||||
|
||||
struct VKBuffer {
|
||||
VkBuffer handle;
|
||||
VkPipelineStageFlagBits lastStage;
|
||||
VkAccessFlagBits lastAccess;
|
||||
// Buffer has no ownership over its memory.
|
||||
// Provided memory, offset and size must only be used to flush memory writes.
|
||||
// Allocation and freeing is done in pages.
|
||||
@@ -48,6 +51,13 @@ struct VKTexelBuffer {
|
||||
VkDescriptorSet descriptorSet;
|
||||
};
|
||||
|
||||
typedef struct {
|
||||
void* data;
|
||||
size_t x1, y1, w, h;
|
||||
size_t scanStride;
|
||||
size_t pixelStride;
|
||||
} VKBuffer_RasterInfo;
|
||||
|
||||
/**
|
||||
* Create buffers, allocate a memory page and bind them together.
|
||||
* 'pageSize' can be 0, meaning that page size is calculated based on buffer memory requirements.
|
||||
@@ -75,9 +85,15 @@ VKBuffer* VKBuffer_Create(VKDevice* device, VkDeviceSize size,
|
||||
VkBufferUsageFlags usage, VkMemoryPropertyFlags properties);
|
||||
|
||||
// TODO usage of this function is suboptimal, we need to avoid creating one-time buffers.
|
||||
VKBuffer* VKBuffer_CreateFromData(VKDevice* device, void* vertices, VkDeviceSize bufferSize);
|
||||
VKBuffer* VKBuffer_CreateFromData(VKDevice* device, void* data, VkDeviceSize dataSize,
|
||||
VkPipelineStageFlags stage, VkAccessFlags access);
|
||||
|
||||
VKBuffer* VKBuffer_CreateFromRaster(VKDevice* device, VKBuffer_RasterInfo info,
|
||||
VkPipelineStageFlags stage, VkAccessFlags access);
|
||||
|
||||
// TODO usage of this function is suboptimal, we need to avoid destroying individual buffers.
|
||||
void VKBuffer_Destroy(VKDevice* device, VKBuffer* buffer);
|
||||
|
||||
void VKBuffer_Dispose(VKDevice* device, void* ctx);
|
||||
|
||||
#endif // VKBuffer_h_Included
|
||||
|
||||
@@ -53,6 +53,12 @@ RING_BUFFER(struct PoolEntry_ ## NAME { \
|
||||
(VAR) = RING_BUFFER_FRONT((RENDERER)->NAME)->value; RING_BUFFER_POP_FRONT((RENDERER)->NAME); \
|
||||
}} while(0)
|
||||
|
||||
/**
|
||||
* Check if there are available items in the pool.
|
||||
*/
|
||||
#define POOL_NOT_EMPTY(RENDERER, NAME) \
|
||||
(VKRenderer_CheckPoolEntryAvailable((RENDERER), RING_BUFFER_FRONT((RENDERER)->NAME)))
|
||||
|
||||
/**
|
||||
* Return an item to the pool. It will only become available again
|
||||
* after the next submitted batch of work completes execution on GPU.
|
||||
@@ -83,6 +89,11 @@ RING_BUFFER(struct PoolEntry_ ## NAME { \
|
||||
*/
|
||||
#define POOL_FREE(RENDERER, NAME) RING_BUFFER_FREE((RENDERER)->NAME)
|
||||
|
||||
typedef struct {
|
||||
VKCleanupHandler handler;
|
||||
void* data;
|
||||
} VKCleanupEntry;
|
||||
|
||||
/**
|
||||
* Renderer attached to device.
|
||||
*/
|
||||
@@ -96,6 +107,7 @@ struct VKRenderer {
|
||||
POOL(VKBuffer, vertexBufferPool);
|
||||
POOL(VKTexelBuffer, maskFillBufferPool);
|
||||
POOL(VkFramebuffer, framebufferDestructionQueue);
|
||||
POOL(VKCleanupEntry, cleanupQueue);
|
||||
ARRAY(VKMemory) bufferMemoryPages;
|
||||
ARRAY(VkDescriptorPool) descriptorPools;
|
||||
ARRAY(VkDescriptorPool) imageDescriptorPools;
|
||||
@@ -436,6 +448,13 @@ void VKRenderer_Destroy(VKRenderer* renderer) {
|
||||
|
||||
static void VKRenderer_CleanupPendingResources(VKRenderer* renderer) {
|
||||
VKDevice* device = renderer->device;
|
||||
|
||||
while (POOL_NOT_EMPTY(renderer, cleanupQueue)) {
|
||||
VKCleanupEntry entry;
|
||||
POOL_TAKE(renderer, cleanupQueue, entry);
|
||||
entry.handler(device, entry.data);
|
||||
}
|
||||
|
||||
for (;;) {
|
||||
VkFramebuffer framebuffer = VK_NULL_HANDLE;
|
||||
POOL_TAKE(renderer, framebufferDestructionQueue, framebuffer);
|
||||
@@ -579,6 +598,33 @@ void VKRenderer_AddImageBarrier(VkImageMemoryBarrier* barriers, VKBarrierBatch*
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Prepare buffer barrier info to be executed in batch, if needed.
|
||||
*/
|
||||
void VKRenderer_AddBufferBarrier(VkBufferMemoryBarrier* barriers, VKBarrierBatch* batch,
|
||||
VKBuffer* buffer, VkPipelineStageFlags stage,
|
||||
VkAccessFlags access)
|
||||
{
|
||||
assert(barriers != NULL && batch != NULL && buffer != NULL);
|
||||
if (stage != buffer->lastStage || access != buffer->lastAccess) {
|
||||
barriers[batch->barrierCount] = (VkBufferMemoryBarrier) {
|
||||
.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,
|
||||
.srcAccessMask = buffer->lastAccess,
|
||||
.dstAccessMask = access,
|
||||
.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED,
|
||||
.buffer = buffer->handle,
|
||||
.offset = 0,
|
||||
.size = VK_WHOLE_SIZE
|
||||
};
|
||||
batch->barrierCount++;
|
||||
batch->srcStages |= buffer->lastStage;
|
||||
batch->dstStages |= stage;
|
||||
buffer->lastStage = stage;
|
||||
buffer->lastAccess = access;
|
||||
}
|
||||
}
|
||||
|
||||
/**
|
||||
* Get Color RGBA components in a suitable for the current render pass.
|
||||
*/
|
||||
@@ -1178,6 +1224,12 @@ static void VKRenderer_SetupStencil(const VKRenderingContext* context) {
|
||||
renderPass->state.shader = NO_SHADER;
|
||||
}
|
||||
|
||||
void VKRenderer_DisposeOnCleanup(VKRenderer* renderer, VKCleanupHandler hnd, void* data) {
|
||||
if (renderer == NULL) return;
|
||||
VKCleanupEntry entry = {hnd, data};
|
||||
POOL_RETURN(renderer, cleanupQueue, entry);
|
||||
}
|
||||
|
||||
/**
|
||||
* Setup pipeline for drawing. Returns FALSE if surface is not yet ready for drawing.
|
||||
*/
|
||||
|
||||
@@ -61,6 +61,8 @@ typedef struct {
|
||||
VkPipelineStageFlags dstStages;
|
||||
} VKBarrierBatch;
|
||||
|
||||
typedef void (*VKCleanupHandler)(VKDevice *renderer, void* data);
|
||||
|
||||
VKRenderer* VKRenderer_Create(VKDevice* device);
|
||||
|
||||
/**
|
||||
@@ -80,6 +82,10 @@ VkCommandBuffer VKRenderer_Record(VKRenderer* renderer);
|
||||
void VKRenderer_AddImageBarrier(VkImageMemoryBarrier* barriers, VKBarrierBatch* batch,
|
||||
VKImage* image, VkPipelineStageFlags stage, VkAccessFlags access, VkImageLayout layout);
|
||||
|
||||
void VKRenderer_AddBufferBarrier(VkBufferMemoryBarrier* barriers, VKBarrierBatch* batch,
|
||||
VKBuffer* buffer, VkPipelineStageFlags stage,
|
||||
VkAccessFlags access);
|
||||
|
||||
void VKRenderer_CreateImageDescriptorSet(VKRenderer* renderer, VkDescriptorPool* descriptorPool, VkDescriptorSet* set);
|
||||
|
||||
void VKRenderer_Destroy(VKRenderer* renderer);
|
||||
@@ -105,6 +111,11 @@ void VKRenderer_DestroyRenderPass(VKSDOps* surface);
|
||||
*/
|
||||
VkBool32 VKRenderer_FlushRenderPass(VKSDOps* surface);
|
||||
|
||||
/**
|
||||
* Register a handler to be called at the cleanup phase of the renderer.
|
||||
*/
|
||||
void VKRenderer_DisposeOnCleanup(VKRenderer* renderer, VKCleanupHandler hnd, void* data);
|
||||
|
||||
/**
|
||||
* Flush pending render pass and queue surface for presentation (if applicable).
|
||||
*/
|
||||
|
||||
Reference in New Issue
Block a user