OpenGL: Make use of persistent buffer maps in buffer cache downloads

Persistent buffer maps were already used by the texture cache, this extends their usage for the buffer cache.

In my testing, using the memory maps for uploads was slower than the existing "ImmediateUpload" path, so the memory map usage is limited to downloads for the time being.
This commit is contained in:
ameerj 2023-05-27 21:46:15 -04:00
parent d6db422098
commit 642c14f0c7
15 changed files with 298 additions and 204 deletions

View file

@ -133,8 +133,8 @@ add_library(video_core STATIC
renderer_opengl/gl_shader_util.h renderer_opengl/gl_shader_util.h
renderer_opengl/gl_state_tracker.cpp renderer_opengl/gl_state_tracker.cpp
renderer_opengl/gl_state_tracker.h renderer_opengl/gl_state_tracker.h
renderer_opengl/gl_stream_buffer.cpp renderer_opengl/gl_staging_buffer_pool.cpp
renderer_opengl/gl_stream_buffer.h renderer_opengl/gl_staging_buffer_pool.h
renderer_opengl/gl_texture_cache.cpp renderer_opengl/gl_texture_cache.cpp
renderer_opengl/gl_texture_cache.h renderer_opengl/gl_texture_cache.h
renderer_opengl/gl_texture_cache_base.cpp renderer_opengl/gl_texture_cache_base.cpp

View file

@ -465,7 +465,6 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
if (committed_ranges.empty()) { if (committed_ranges.empty()) {
if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) {
async_buffers.emplace_back(std::optional<Async_Buffer>{}); async_buffers.emplace_back(std::optional<Async_Buffer>{});
} }
return; return;
@ -526,7 +525,6 @@ void BufferCache<P>::CommitAsyncFlushesHigh() {
committed_ranges.clear(); committed_ranges.clear();
if (downloads.empty()) { if (downloads.empty()) {
if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) { if constexpr (IMPLEMENTS_ASYNC_DOWNLOADS) {
async_buffers.emplace_back(std::optional<Async_Buffer>{}); async_buffers.emplace_back(std::optional<Async_Buffer>{});
} }
return; return;
@ -678,7 +676,7 @@ void BufferCache<P>::BindHostIndexBuffer() {
const u32 size = index_buffer.size; const u32 size = index_buffer.size;
const auto& draw_state = maxwell3d->draw_manager->GetDrawState(); const auto& draw_state = maxwell3d->draw_manager->GetDrawState();
if (!draw_state.inline_index_draw_indexes.empty()) [[unlikely]] { if (!draw_state.inline_index_draw_indexes.empty()) [[unlikely]] {
if constexpr (USE_MEMORY_MAPS) { if constexpr (USE_MEMORY_MAPS_FOR_UPLOADS) {
auto upload_staging = runtime.UploadStagingBuffer(size); auto upload_staging = runtime.UploadStagingBuffer(size);
std::array<BufferCopy, 1> copies{ std::array<BufferCopy, 1> copies{
{BufferCopy{.src_offset = upload_staging.offset, .dst_offset = 0, .size = size}}}; {BufferCopy{.src_offset = upload_staging.offset, .dst_offset = 0, .size = size}}};
@ -1446,7 +1444,7 @@ bool BufferCache<P>::SynchronizeBufferNoModified(Buffer& buffer, VAddr cpu_addr,
template <class P> template <class P>
void BufferCache<P>::UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy, void BufferCache<P>::UploadMemory(Buffer& buffer, u64 total_size_bytes, u64 largest_copy,
std::span<BufferCopy> copies) { std::span<BufferCopy> copies) {
if constexpr (USE_MEMORY_MAPS) { if constexpr (USE_MEMORY_MAPS_FOR_UPLOADS) {
MappedUploadMemory(buffer, total_size_bytes, copies); MappedUploadMemory(buffer, total_size_bytes, copies);
} else { } else {
ImmediateUploadMemory(buffer, largest_copy, copies); ImmediateUploadMemory(buffer, largest_copy, copies);
@ -1457,7 +1455,7 @@ template <class P>
void BufferCache<P>::ImmediateUploadMemory([[maybe_unused]] Buffer& buffer, void BufferCache<P>::ImmediateUploadMemory([[maybe_unused]] Buffer& buffer,
[[maybe_unused]] u64 largest_copy, [[maybe_unused]] u64 largest_copy,
[[maybe_unused]] std::span<const BufferCopy> copies) { [[maybe_unused]] std::span<const BufferCopy> copies) {
if constexpr (!USE_MEMORY_MAPS) { if constexpr (!USE_MEMORY_MAPS_FOR_UPLOADS) {
std::span<u8> immediate_buffer; std::span<u8> immediate_buffer;
for (const BufferCopy& copy : copies) { for (const BufferCopy& copy : copies) {
std::span<const u8> upload_span; std::span<const u8> upload_span;
@ -1516,7 +1514,7 @@ bool BufferCache<P>::InlineMemory(VAddr dest_address, size_t copy_size,
auto& buffer = slot_buffers[buffer_id]; auto& buffer = slot_buffers[buffer_id];
SynchronizeBuffer(buffer, dest_address, static_cast<u32>(copy_size)); SynchronizeBuffer(buffer, dest_address, static_cast<u32>(copy_size));
if constexpr (USE_MEMORY_MAPS) { if constexpr (USE_MEMORY_MAPS_FOR_UPLOADS) {
auto upload_staging = runtime.UploadStagingBuffer(copy_size); auto upload_staging = runtime.UploadStagingBuffer(copy_size);
std::array copies{BufferCopy{ std::array copies{BufferCopy{
.src_offset = upload_staging.offset, .src_offset = upload_staging.offset,

View file

@ -103,6 +103,7 @@ class BufferCache : public VideoCommon::ChannelSetupCaches<VideoCommon::ChannelI
static constexpr bool USE_MEMORY_MAPS = P::USE_MEMORY_MAPS; static constexpr bool USE_MEMORY_MAPS = P::USE_MEMORY_MAPS;
static constexpr bool SEPARATE_IMAGE_BUFFERS_BINDINGS = P::SEPARATE_IMAGE_BUFFER_BINDINGS; static constexpr bool SEPARATE_IMAGE_BUFFERS_BINDINGS = P::SEPARATE_IMAGE_BUFFER_BINDINGS;
static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = P::IMPLEMENTS_ASYNC_DOWNLOADS; static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = P::IMPLEMENTS_ASYNC_DOWNLOADS;
static constexpr bool USE_MEMORY_MAPS_FOR_UPLOADS = P::USE_MEMORY_MAPS_FOR_UPLOADS;
static constexpr BufferId NULL_BUFFER_ID{0}; static constexpr BufferId NULL_BUFFER_ID{0};

View file

@ -106,8 +106,10 @@ GLuint Buffer::View(u32 offset, u32 size, PixelFormat format) {
return views.back().texture.handle; return views.back().texture.handle;
} }
BufferCacheRuntime::BufferCacheRuntime(const Device& device_) BufferCacheRuntime::BufferCacheRuntime(const Device& device_,
: device{device_}, has_fast_buffer_sub_data{device.HasFastBufferSubData()}, StagingBufferPool& staging_buffer_pool_)
: device{device_}, staging_buffer_pool{staging_buffer_pool_},
has_fast_buffer_sub_data{device.HasFastBufferSubData()},
use_assembly_shaders{device.UseAssemblyShaders()}, use_assembly_shaders{device.UseAssemblyShaders()},
has_unified_vertex_buffers{device.HasVertexBufferUnifiedMemory()}, has_unified_vertex_buffers{device.HasVertexBufferUnifiedMemory()},
stream_buffer{has_fast_buffer_sub_data ? std::nullopt : std::make_optional<StreamBuffer>()} { stream_buffer{has_fast_buffer_sub_data ? std::nullopt : std::make_optional<StreamBuffer>()} {
@ -140,6 +142,14 @@ BufferCacheRuntime::BufferCacheRuntime(const Device& device_)
}(); }();
} }
StagingBufferMap BufferCacheRuntime::UploadStagingBuffer(size_t size) {
return staging_buffer_pool.RequestUploadBuffer(size);
}
StagingBufferMap BufferCacheRuntime::DownloadStagingBuffer(size_t size) {
return staging_buffer_pool.RequestDownloadBuffer(size);
}
u64 BufferCacheRuntime::GetDeviceMemoryUsage() const { u64 BufferCacheRuntime::GetDeviceMemoryUsage() const {
if (device.CanReportMemoryUsage()) { if (device.CanReportMemoryUsage()) {
return device_access_memory - device.GetCurrentDedicatedVideoMemory(); return device_access_memory - device.GetCurrentDedicatedVideoMemory();
@ -147,13 +157,47 @@ u64 BufferCacheRuntime::GetDeviceMemoryUsage() const {
return 2_GiB; return 2_GiB;
} }
void BufferCacheRuntime::CopyBuffer(GLuint dst_buffer, GLuint src_buffer,
std::span<const VideoCommon::BufferCopy> copies, bool barrier) {
if (barrier) {
PreCopyBarrier();
}
for (const VideoCommon::BufferCopy& copy : copies) {
glCopyNamedBufferSubData(src_buffer, dst_buffer, static_cast<GLintptr>(copy.src_offset),
static_cast<GLintptr>(copy.dst_offset),
static_cast<GLsizeiptr>(copy.size));
}
if (barrier) {
PostCopyBarrier();
}
}
void BufferCacheRuntime::CopyBuffer(GLuint dst_buffer, Buffer& src_buffer,
std::span<const VideoCommon::BufferCopy> copies, bool barrier) {
CopyBuffer(dst_buffer, src_buffer.Handle(), copies, barrier);
}
void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, GLuint src_buffer,
std::span<const VideoCommon::BufferCopy> copies, bool barrier) {
CopyBuffer(dst_buffer.Handle(), src_buffer, copies, barrier);
}
void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer, void BufferCacheRuntime::CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer,
std::span<const VideoCommon::BufferCopy> copies) { std::span<const VideoCommon::BufferCopy> copies) {
for (const VideoCommon::BufferCopy& copy : copies) { CopyBuffer(dst_buffer.Handle(), src_buffer.Handle(), copies);
glCopyNamedBufferSubData(
src_buffer.Handle(), dst_buffer.Handle(), static_cast<GLintptr>(copy.src_offset),
static_cast<GLintptr>(copy.dst_offset), static_cast<GLsizeiptr>(copy.size));
} }
void BufferCacheRuntime::PreCopyBarrier() {
// TODO: finer grained barrier?
glMemoryBarrier(GL_ALL_BARRIER_BITS);
}
void BufferCacheRuntime::PostCopyBarrier() {
glMemoryBarrier(GL_BUFFER_UPDATE_BARRIER_BIT | GL_CLIENT_MAPPED_BUFFER_BARRIER_BIT);
}
void BufferCacheRuntime::Finish() {
glFinish();
} }
void BufferCacheRuntime::ClearBuffer(Buffer& dest_buffer, u32 offset, size_t size, u32 value) { void BufferCacheRuntime::ClearBuffer(Buffer& dest_buffer, u32 offset, size_t size, u32 value) {

View file

@ -12,7 +12,7 @@
#include "video_core/rasterizer_interface.h" #include "video_core/rasterizer_interface.h"
#include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_stream_buffer.h" #include "video_core/renderer_opengl/gl_staging_buffer_pool.h"
namespace OpenGL { namespace OpenGL {
@ -60,11 +60,28 @@ class BufferCacheRuntime {
public: public:
static constexpr u8 INVALID_BINDING = std::numeric_limits<u8>::max(); static constexpr u8 INVALID_BINDING = std::numeric_limits<u8>::max();
explicit BufferCacheRuntime(const Device& device_); explicit BufferCacheRuntime(const Device& device_, StagingBufferPool& staging_buffer_pool_);
[[nodiscard]] StagingBufferMap UploadStagingBuffer(size_t size);
[[nodiscard]] StagingBufferMap DownloadStagingBuffer(size_t size);
void CopyBuffer(GLuint dst_buffer, GLuint src_buffer,
std::span<const VideoCommon::BufferCopy> copies, bool barrier = true);
void CopyBuffer(GLuint dst_buffer, Buffer& src_buffer,
std::span<const VideoCommon::BufferCopy> copies, bool barrier = true);
void CopyBuffer(Buffer& dst_buffer, GLuint src_buffer,
std::span<const VideoCommon::BufferCopy> copies, bool barrier = true);
void CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer, void CopyBuffer(Buffer& dst_buffer, Buffer& src_buffer,
std::span<const VideoCommon::BufferCopy> copies); std::span<const VideoCommon::BufferCopy> copies);
void PreCopyBarrier();
void PostCopyBarrier();
void Finish();
void ClearBuffer(Buffer& dest_buffer, u32 offset, size_t size, u32 value); void ClearBuffer(Buffer& dest_buffer, u32 offset, size_t size, u32 value);
void BindIndexBuffer(Buffer& buffer, u32 offset, u32 size); void BindIndexBuffer(Buffer& buffer, u32 offset, u32 size);
@ -169,6 +186,7 @@ private:
}; };
const Device& device; const Device& device;
StagingBufferPool& staging_buffer_pool;
bool has_fast_buffer_sub_data = false; bool has_fast_buffer_sub_data = false;
bool use_assembly_shaders = false; bool use_assembly_shaders = false;
@ -201,7 +219,7 @@ private:
struct BufferCacheParams { struct BufferCacheParams {
using Runtime = OpenGL::BufferCacheRuntime; using Runtime = OpenGL::BufferCacheRuntime;
using Buffer = OpenGL::Buffer; using Buffer = OpenGL::Buffer;
using Async_Buffer = u32; using Async_Buffer = OpenGL::StagingBufferMap;
using MemoryTracker = VideoCommon::MemoryTrackerBase<VideoCore::RasterizerInterface>; using MemoryTracker = VideoCommon::MemoryTrackerBase<VideoCore::RasterizerInterface>;
static constexpr bool IS_OPENGL = true; static constexpr bool IS_OPENGL = true;
@ -209,9 +227,12 @@ struct BufferCacheParams {
static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT = true; static constexpr bool HAS_FULL_INDEX_AND_PRIMITIVE_SUPPORT = true;
static constexpr bool NEEDS_BIND_UNIFORM_INDEX = true; static constexpr bool NEEDS_BIND_UNIFORM_INDEX = true;
static constexpr bool NEEDS_BIND_STORAGE_INDEX = true; static constexpr bool NEEDS_BIND_STORAGE_INDEX = true;
static constexpr bool USE_MEMORY_MAPS = false; static constexpr bool USE_MEMORY_MAPS = true;
static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = true; static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = true;
static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = false; static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = false;
// TODO: Investigate why OpenGL seems to perform worse with persistently mapped buffer uploads
static constexpr bool USE_MEMORY_MAPS_FOR_UPLOADS = false;
}; };
using BufferCache = VideoCommon::BufferCache<BufferCacheParams>; using BufferCache = VideoCommon::BufferCache<BufferCacheParams>;

View file

@ -24,6 +24,7 @@
#include "video_core/renderer_opengl/gl_query_cache.h" #include "video_core/renderer_opengl/gl_query_cache.h"
#include "video_core/renderer_opengl/gl_rasterizer.h" #include "video_core/renderer_opengl/gl_rasterizer.h"
#include "video_core/renderer_opengl/gl_shader_cache.h" #include "video_core/renderer_opengl/gl_shader_cache.h"
#include "video_core/renderer_opengl/gl_staging_buffer_pool.h"
#include "video_core/renderer_opengl/gl_texture_cache.h" #include "video_core/renderer_opengl/gl_texture_cache.h"
#include "video_core/renderer_opengl/maxwell_to_gl.h" #include "video_core/renderer_opengl/maxwell_to_gl.h"
#include "video_core/renderer_opengl/renderer_opengl.h" #include "video_core/renderer_opengl/renderer_opengl.h"
@ -58,8 +59,9 @@ RasterizerOpenGL::RasterizerOpenGL(Core::Frontend::EmuWindow& emu_window_, Tegra
StateTracker& state_tracker_) StateTracker& state_tracker_)
: RasterizerAccelerated(cpu_memory_), gpu(gpu_), device(device_), screen_info(screen_info_), : RasterizerAccelerated(cpu_memory_), gpu(gpu_), device(device_), screen_info(screen_info_),
program_manager(program_manager_), state_tracker(state_tracker_), program_manager(program_manager_), state_tracker(state_tracker_),
texture_cache_runtime(device, program_manager, state_tracker), texture_cache_runtime(device, program_manager, state_tracker, staging_buffer_pool),
texture_cache(texture_cache_runtime, *this), buffer_cache_runtime(device), texture_cache(texture_cache_runtime, *this),
buffer_cache_runtime(device, staging_buffer_pool),
buffer_cache(*this, cpu_memory_, buffer_cache_runtime), buffer_cache(*this, cpu_memory_, buffer_cache_runtime),
shader_cache(*this, emu_window_, device, texture_cache, buffer_cache, program_manager, shader_cache(*this, emu_window_, device, texture_cache, buffer_cache, program_manager,
state_tracker, gpu.ShaderNotify()), state_tracker, gpu.ShaderNotify()),

View file

@ -230,6 +230,7 @@ private:
ProgramManager& program_manager; ProgramManager& program_manager;
StateTracker& state_tracker; StateTracker& state_tracker;
StagingBufferPool staging_buffer_pool;
TextureCacheRuntime texture_cache_runtime; TextureCacheRuntime texture_cache_runtime;
TextureCache texture_cache; TextureCache texture_cache;
BufferCacheRuntime buffer_cache_runtime; BufferCacheRuntime buffer_cache_runtime;

View file

@ -0,0 +1,134 @@
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <array>
#include <memory>
#include <span>
#include <glad/glad.h>
#include "common/alignment.h"
#include "common/assert.h"
#include "video_core/renderer_opengl/gl_staging_buffer_pool.h"
namespace OpenGL {
StagingBufferMap::~StagingBufferMap() {
if (sync) {
sync->Create();
}
}
StagingBuffers::StagingBuffers(GLenum storage_flags_, GLenum map_flags_)
: storage_flags{storage_flags_}, map_flags{map_flags_} {}
StagingBuffers::~StagingBuffers() = default;
StagingBufferMap StagingBuffers::RequestMap(size_t requested_size, bool insert_fence) {
const size_t index = RequestBuffer(requested_size);
OGLSync* const sync = insert_fence ? &syncs[index] : nullptr;
return StagingBufferMap{
.mapped_span = std::span(maps[index], requested_size),
.sync = sync,
.buffer = buffers[index].handle,
};
}
size_t StagingBuffers::RequestBuffer(size_t requested_size) {
if (const std::optional<size_t> index = FindBuffer(requested_size); index) {
return *index;
}
OGLBuffer& buffer = buffers.emplace_back();
buffer.Create();
glNamedBufferStorage(buffer.handle, requested_size, nullptr,
storage_flags | GL_MAP_PERSISTENT_BIT);
maps.push_back(static_cast<u8*>(glMapNamedBufferRange(buffer.handle, 0, requested_size,
map_flags | GL_MAP_PERSISTENT_BIT)));
syncs.emplace_back();
sizes.push_back(requested_size);
ASSERT(syncs.size() == buffers.size() && buffers.size() == maps.size() &&
maps.size() == sizes.size());
return buffers.size() - 1;
}
std::optional<size_t> StagingBuffers::FindBuffer(size_t requested_size) {
size_t smallest_buffer = std::numeric_limits<size_t>::max();
std::optional<size_t> found;
const size_t num_buffers = sizes.size();
for (size_t index = 0; index < num_buffers; ++index) {
const size_t buffer_size = sizes[index];
if (buffer_size < requested_size || buffer_size >= smallest_buffer) {
continue;
}
if (syncs[index].handle != 0) {
if (!syncs[index].IsSignaled()) {
continue;
}
syncs[index].Release();
}
smallest_buffer = buffer_size;
found = index;
}
return found;
}
StreamBuffer::StreamBuffer() {
static constexpr GLenum flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT;
buffer.Create();
glObjectLabel(GL_BUFFER, buffer.handle, -1, "Stream Buffer");
glNamedBufferStorage(buffer.handle, STREAM_BUFFER_SIZE, nullptr, flags);
mapped_pointer =
static_cast<u8*>(glMapNamedBufferRange(buffer.handle, 0, STREAM_BUFFER_SIZE, flags));
for (OGLSync& sync : fences) {
sync.Create();
}
}
std::pair<std::span<u8>, size_t> StreamBuffer::Request(size_t size) noexcept {
ASSERT(size < REGION_SIZE);
for (size_t region = Region(used_iterator), region_end = Region(iterator); region < region_end;
++region) {
fences[region].Create();
}
used_iterator = iterator;
for (size_t region = Region(free_iterator) + 1,
region_end = std::min(Region(iterator + size) + 1, NUM_SYNCS);
region < region_end; ++region) {
glClientWaitSync(fences[region].handle, 0, GL_TIMEOUT_IGNORED);
fences[region].Release();
}
if (iterator + size >= free_iterator) {
free_iterator = iterator + size;
}
if (iterator + size > STREAM_BUFFER_SIZE) {
for (size_t region = Region(used_iterator); region < NUM_SYNCS; ++region) {
fences[region].Create();
}
used_iterator = 0;
iterator = 0;
free_iterator = size;
for (size_t region = 0, region_end = Region(size); region <= region_end; ++region) {
glClientWaitSync(fences[region].handle, 0, GL_TIMEOUT_IGNORED);
fences[region].Release();
}
}
const size_t offset = iterator;
iterator = Common::AlignUp(iterator + size, MAX_ALIGNMENT);
return {std::span(mapped_pointer + offset, size), offset};
}
StagingBufferMap StagingBufferPool::RequestUploadBuffer(size_t size) {
return upload_buffers.RequestMap(size, true);
}
StagingBufferMap StagingBufferPool::RequestDownloadBuffer(size_t size) {
return download_buffers.RequestMap(size, false);
}
} // namespace OpenGL

View file

@ -4,8 +4,10 @@
#pragma once #pragma once
#include <array> #include <array>
#include <optional>
#include <span> #include <span>
#include <utility> #include <utility>
#include <vector>
#include <glad/glad.h> #include <glad/glad.h>
@ -17,6 +19,33 @@ namespace OpenGL {
using namespace Common::Literals; using namespace Common::Literals;
struct StagingBufferMap {
~StagingBufferMap();
std::span<u8> mapped_span;
size_t offset = 0;
OGLSync* sync;
GLuint buffer;
};
struct StagingBuffers {
explicit StagingBuffers(GLenum storage_flags_, GLenum map_flags_);
~StagingBuffers();
StagingBufferMap RequestMap(size_t requested_size, bool insert_fence);
size_t RequestBuffer(size_t requested_size);
std::optional<size_t> FindBuffer(size_t requested_size);
std::vector<OGLSync> syncs;
std::vector<OGLBuffer> buffers;
std::vector<u8*> maps;
std::vector<size_t> sizes;
GLenum storage_flags;
GLenum map_flags;
};
class StreamBuffer { class StreamBuffer {
static constexpr size_t STREAM_BUFFER_SIZE = 64_MiB; static constexpr size_t STREAM_BUFFER_SIZE = 64_MiB;
static constexpr size_t NUM_SYNCS = 16; static constexpr size_t NUM_SYNCS = 16;
@ -48,4 +77,17 @@ private:
std::array<OGLSync, NUM_SYNCS> fences; std::array<OGLSync, NUM_SYNCS> fences;
}; };
class StagingBufferPool {
public:
StagingBufferPool() = default;
~StagingBufferPool() = default;
StagingBufferMap RequestUploadBuffer(size_t size);
StagingBufferMap RequestDownloadBuffer(size_t size);
private:
StagingBuffers upload_buffers{GL_MAP_WRITE_BIT, GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT};
StagingBuffers download_buffers{GL_MAP_READ_BIT | GL_CLIENT_STORAGE_BIT, GL_MAP_READ_BIT};
};
} // namespace OpenGL } // namespace OpenGL

View file

@ -1,63 +0,0 @@
// SPDX-FileCopyrightText: Copyright 2021 yuzu Emulator Project
// SPDX-License-Identifier: GPL-2.0-or-later
#include <array>
#include <memory>
#include <span>
#include <glad/glad.h>
#include "common/alignment.h"
#include "common/assert.h"
#include "video_core/renderer_opengl/gl_stream_buffer.h"
namespace OpenGL {
StreamBuffer::StreamBuffer() {
static constexpr GLenum flags = GL_MAP_WRITE_BIT | GL_MAP_PERSISTENT_BIT | GL_MAP_COHERENT_BIT;
buffer.Create();
glObjectLabel(GL_BUFFER, buffer.handle, -1, "Stream Buffer");
glNamedBufferStorage(buffer.handle, STREAM_BUFFER_SIZE, nullptr, flags);
mapped_pointer =
static_cast<u8*>(glMapNamedBufferRange(buffer.handle, 0, STREAM_BUFFER_SIZE, flags));
for (OGLSync& sync : fences) {
sync.Create();
}
}
std::pair<std::span<u8>, size_t> StreamBuffer::Request(size_t size) noexcept {
ASSERT(size < REGION_SIZE);
for (size_t region = Region(used_iterator), region_end = Region(iterator); region < region_end;
++region) {
fences[region].Create();
}
used_iterator = iterator;
for (size_t region = Region(free_iterator) + 1,
region_end = std::min(Region(iterator + size) + 1, NUM_SYNCS);
region < region_end; ++region) {
glClientWaitSync(fences[region].handle, 0, GL_TIMEOUT_IGNORED);
fences[region].Release();
}
if (iterator + size >= free_iterator) {
free_iterator = iterator + size;
}
if (iterator + size > STREAM_BUFFER_SIZE) {
for (size_t region = Region(used_iterator); region < NUM_SYNCS; ++region) {
fences[region].Create();
}
used_iterator = 0;
iterator = 0;
free_iterator = size;
for (size_t region = 0, region_end = Region(size); region <= region_end; ++region) {
glClientWaitSync(fences[region].handle, 0, GL_TIMEOUT_IGNORED);
fences[region].Release();
}
}
const size_t offset = iterator;
iterator = Common::AlignUp(iterator + size, MAX_ALIGNMENT);
return {std::span(mapped_pointer + offset, size), offset};
}
} // namespace OpenGL

View file

@ -451,19 +451,14 @@ OGLTexture MakeImage(const VideoCommon::ImageInfo& info, GLenum gl_internal_form
return is_srgb ? GL_SRGB8_ALPHA8 : GL_RGBA8; return is_srgb ? GL_SRGB8_ALPHA8 : GL_RGBA8;
} }
} }
} // Anonymous namespace } // Anonymous namespace
ImageBufferMap::~ImageBufferMap() {
if (sync) {
sync->Create();
}
}
TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager& program_manager, TextureCacheRuntime::TextureCacheRuntime(const Device& device_, ProgramManager& program_manager,
StateTracker& state_tracker_) StateTracker& state_tracker_,
: device{device_}, state_tracker{state_tracker_}, util_shaders(program_manager), StagingBufferPool& staging_buffer_pool_)
format_conversion_pass{util_shaders}, resolution{Settings::values.resolution_info} { : device{device_}, state_tracker{state_tracker_}, staging_buffer_pool{staging_buffer_pool_},
util_shaders(program_manager), format_conversion_pass{util_shaders},
resolution{Settings::values.resolution_info} {
static constexpr std::array TARGETS{GL_TEXTURE_1D_ARRAY, GL_TEXTURE_2D_ARRAY, GL_TEXTURE_3D}; static constexpr std::array TARGETS{GL_TEXTURE_1D_ARRAY, GL_TEXTURE_2D_ARRAY, GL_TEXTURE_3D};
for (size_t i = 0; i < TARGETS.size(); ++i) { for (size_t i = 0; i < TARGETS.size(); ++i) {
const GLenum target = TARGETS[i]; const GLenum target = TARGETS[i];
@ -553,12 +548,12 @@ void TextureCacheRuntime::Finish() {
glFinish(); glFinish();
} }
ImageBufferMap TextureCacheRuntime::UploadStagingBuffer(size_t size) { StagingBufferMap TextureCacheRuntime::UploadStagingBuffer(size_t size) {
return upload_buffers.RequestMap(size, true); return staging_buffer_pool.RequestUploadBuffer(size);
} }
ImageBufferMap TextureCacheRuntime::DownloadStagingBuffer(size_t size) { StagingBufferMap TextureCacheRuntime::DownloadStagingBuffer(size_t size) {
return download_buffers.RequestMap(size, false); return staging_buffer_pool.RequestDownloadBuffer(size);
} }
u64 TextureCacheRuntime::GetDeviceMemoryUsage() const { u64 TextureCacheRuntime::GetDeviceMemoryUsage() const {
@ -643,7 +638,7 @@ void TextureCacheRuntime::BlitFramebuffer(Framebuffer* dst, Framebuffer* src,
is_linear ? GL_LINEAR : GL_NEAREST); is_linear ? GL_LINEAR : GL_NEAREST);
} }
void TextureCacheRuntime::AccelerateImageUpload(Image& image, const ImageBufferMap& map, void TextureCacheRuntime::AccelerateImageUpload(Image& image, const StagingBufferMap& map,
std::span<const SwizzleParameters> swizzles) { std::span<const SwizzleParameters> swizzles) {
switch (image.info.type) { switch (image.info.type) {
case ImageType::e2D: case ImageType::e2D:
@ -685,64 +680,6 @@ bool TextureCacheRuntime::HasNativeASTC() const noexcept {
return device.HasASTC(); return device.HasASTC();
} }
TextureCacheRuntime::StagingBuffers::StagingBuffers(GLenum storage_flags_, GLenum map_flags_)
: storage_flags{storage_flags_}, map_flags{map_flags_} {}
TextureCacheRuntime::StagingBuffers::~StagingBuffers() = default;
ImageBufferMap TextureCacheRuntime::StagingBuffers::RequestMap(size_t requested_size,
bool insert_fence) {
const size_t index = RequestBuffer(requested_size);
OGLSync* const sync = insert_fence ? &syncs[index] : nullptr;
return ImageBufferMap{
.mapped_span = std::span(maps[index], requested_size),
.sync = sync,
.buffer = buffers[index].handle,
};
}
size_t TextureCacheRuntime::StagingBuffers::RequestBuffer(size_t requested_size) {
if (const std::optional<size_t> index = FindBuffer(requested_size); index) {
return *index;
}
OGLBuffer& buffer = buffers.emplace_back();
buffer.Create();
glNamedBufferStorage(buffer.handle, requested_size, nullptr,
storage_flags | GL_MAP_PERSISTENT_BIT);
maps.push_back(static_cast<u8*>(glMapNamedBufferRange(buffer.handle, 0, requested_size,
map_flags | GL_MAP_PERSISTENT_BIT)));
syncs.emplace_back();
sizes.push_back(requested_size);
ASSERT(syncs.size() == buffers.size() && buffers.size() == maps.size() &&
maps.size() == sizes.size());
return buffers.size() - 1;
}
std::optional<size_t> TextureCacheRuntime::StagingBuffers::FindBuffer(size_t requested_size) {
size_t smallest_buffer = std::numeric_limits<size_t>::max();
std::optional<size_t> found;
const size_t num_buffers = sizes.size();
for (size_t index = 0; index < num_buffers; ++index) {
const size_t buffer_size = sizes[index];
if (buffer_size < requested_size || buffer_size >= smallest_buffer) {
continue;
}
if (syncs[index].handle != 0) {
if (!syncs[index].IsSignaled()) {
continue;
}
syncs[index].Release();
}
smallest_buffer = buffer_size;
found = index;
}
return found;
}
Image::Image(TextureCacheRuntime& runtime_, const VideoCommon::ImageInfo& info_, GPUVAddr gpu_addr_, Image::Image(TextureCacheRuntime& runtime_, const VideoCommon::ImageInfo& info_, GPUVAddr gpu_addr_,
VAddr cpu_addr_) VAddr cpu_addr_)
: VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), runtime{&runtime_} { : VideoCommon::ImageBase(info_, gpu_addr_, cpu_addr_), runtime{&runtime_} {
@ -818,7 +755,7 @@ void Image::UploadMemory(GLuint buffer_handle, size_t buffer_offset,
} }
} }
void Image::UploadMemory(const ImageBufferMap& map, void Image::UploadMemory(const StagingBufferMap& map,
std::span<const VideoCommon::BufferImageCopy> copies) { std::span<const VideoCommon::BufferImageCopy> copies) {
UploadMemory(map.buffer, map.offset, copies); UploadMemory(map.buffer, map.offset, copies);
} }
@ -865,7 +802,7 @@ void Image::DownloadMemory(std::span<GLuint> buffer_handles, std::span<size_t> b
} }
} }
void Image::DownloadMemory(ImageBufferMap& map, void Image::DownloadMemory(StagingBufferMap& map,
std::span<const VideoCommon::BufferImageCopy> copies) { std::span<const VideoCommon::BufferImageCopy> copies) {
DownloadMemory(map.buffer, map.offset, copies); DownloadMemory(map.buffer, map.offset, copies);
} }

View file

@ -11,6 +11,7 @@
#include "shader_recompiler/shader_info.h" #include "shader_recompiler/shader_info.h"
#include "video_core/renderer_opengl/gl_device.h" #include "video_core/renderer_opengl/gl_device.h"
#include "video_core/renderer_opengl/gl_resource_manager.h" #include "video_core/renderer_opengl/gl_resource_manager.h"
#include "video_core/renderer_opengl/gl_staging_buffer_pool.h"
#include "video_core/renderer_opengl/util_shaders.h" #include "video_core/renderer_opengl/util_shaders.h"
#include "video_core/texture_cache/image_view_base.h" #include "video_core/texture_cache/image_view_base.h"
#include "video_core/texture_cache/texture_cache_base.h" #include "video_core/texture_cache/texture_cache_base.h"
@ -37,15 +38,6 @@ using VideoCommon::Region2D;
using VideoCommon::RenderTargets; using VideoCommon::RenderTargets;
using VideoCommon::SlotVector; using VideoCommon::SlotVector;
struct ImageBufferMap {
~ImageBufferMap();
std::span<u8> mapped_span;
size_t offset = 0;
OGLSync* sync;
GLuint buffer;
};
struct FormatProperties { struct FormatProperties {
GLenum compatibility_class; GLenum compatibility_class;
bool compatibility_by_size; bool compatibility_by_size;
@ -74,14 +66,15 @@ class TextureCacheRuntime {
public: public:
explicit TextureCacheRuntime(const Device& device, ProgramManager& program_manager, explicit TextureCacheRuntime(const Device& device, ProgramManager& program_manager,
StateTracker& state_tracker); StateTracker& state_tracker,
StagingBufferPool& staging_buffer_pool);
~TextureCacheRuntime(); ~TextureCacheRuntime();
void Finish(); void Finish();
ImageBufferMap UploadStagingBuffer(size_t size); StagingBufferMap UploadStagingBuffer(size_t size);
ImageBufferMap DownloadStagingBuffer(size_t size); StagingBufferMap DownloadStagingBuffer(size_t size);
u64 GetDeviceLocalMemory() const { u64 GetDeviceLocalMemory() const {
return device_access_memory; return device_access_memory;
@ -120,7 +113,7 @@ public:
const Region2D& src_region, Tegra::Engines::Fermi2D::Filter filter, const Region2D& src_region, Tegra::Engines::Fermi2D::Filter filter,
Tegra::Engines::Fermi2D::Operation operation); Tegra::Engines::Fermi2D::Operation operation);
void AccelerateImageUpload(Image& image, const ImageBufferMap& map, void AccelerateImageUpload(Image& image, const StagingBufferMap& map,
std::span<const VideoCommon::SwizzleParameters> swizzles); std::span<const VideoCommon::SwizzleParameters> swizzles);
void InsertUploadMemoryBarrier(); void InsertUploadMemoryBarrier();
@ -149,35 +142,16 @@ public:
} }
private: private:
struct StagingBuffers {
explicit StagingBuffers(GLenum storage_flags_, GLenum map_flags_);
~StagingBuffers();
ImageBufferMap RequestMap(size_t requested_size, bool insert_fence);
size_t RequestBuffer(size_t requested_size);
std::optional<size_t> FindBuffer(size_t requested_size);
std::vector<OGLSync> syncs;
std::vector<OGLBuffer> buffers;
std::vector<u8*> maps;
std::vector<size_t> sizes;
GLenum storage_flags;
GLenum map_flags;
};
const Device& device; const Device& device;
StateTracker& state_tracker; StateTracker& state_tracker;
StagingBufferPool& staging_buffer_pool;
UtilShaders util_shaders; UtilShaders util_shaders;
FormatConversionPass format_conversion_pass; FormatConversionPass format_conversion_pass;
std::array<std::unordered_map<GLenum, FormatProperties>, 3> format_properties; std::array<std::unordered_map<GLenum, FormatProperties>, 3> format_properties;
bool has_broken_texture_view_formats = false; bool has_broken_texture_view_formats = false;
StagingBuffers upload_buffers{GL_MAP_WRITE_BIT, GL_MAP_WRITE_BIT | GL_MAP_FLUSH_EXPLICIT_BIT};
StagingBuffers download_buffers{GL_MAP_READ_BIT | GL_CLIENT_STORAGE_BIT, GL_MAP_READ_BIT};
OGLTexture null_image_1d_array; OGLTexture null_image_1d_array;
OGLTexture null_image_cube_array; OGLTexture null_image_cube_array;
OGLTexture null_image_3d; OGLTexture null_image_3d;
@ -213,7 +187,7 @@ public:
void UploadMemory(GLuint buffer_handle, size_t buffer_offset, void UploadMemory(GLuint buffer_handle, size_t buffer_offset,
std::span<const VideoCommon::BufferImageCopy> copies); std::span<const VideoCommon::BufferImageCopy> copies);
void UploadMemory(const ImageBufferMap& map, void UploadMemory(const StagingBufferMap& map,
std::span<const VideoCommon::BufferImageCopy> copies); std::span<const VideoCommon::BufferImageCopy> copies);
void DownloadMemory(GLuint buffer_handle, size_t buffer_offset, void DownloadMemory(GLuint buffer_handle, size_t buffer_offset,
@ -222,7 +196,8 @@ public:
void DownloadMemory(std::span<GLuint> buffer_handle, std::span<size_t> buffer_offset, void DownloadMemory(std::span<GLuint> buffer_handle, std::span<size_t> buffer_offset,
std::span<const VideoCommon::BufferImageCopy> copies); std::span<const VideoCommon::BufferImageCopy> copies);
void DownloadMemory(ImageBufferMap& map, std::span<const VideoCommon::BufferImageCopy> copies); void DownloadMemory(StagingBufferMap& map,
std::span<const VideoCommon::BufferImageCopy> copies);
GLuint StorageHandle() noexcept; GLuint StorageHandle() noexcept;

View file

@ -19,6 +19,7 @@
#include "video_core/host_shaders/pitch_unswizzle_comp.h" #include "video_core/host_shaders/pitch_unswizzle_comp.h"
#include "video_core/renderer_opengl/gl_shader_manager.h" #include "video_core/renderer_opengl/gl_shader_manager.h"
#include "video_core/renderer_opengl/gl_shader_util.h" #include "video_core/renderer_opengl/gl_shader_util.h"
#include "video_core/renderer_opengl/gl_staging_buffer_pool.h"
#include "video_core/renderer_opengl/gl_texture_cache.h" #include "video_core/renderer_opengl/gl_texture_cache.h"
#include "video_core/renderer_opengl/util_shaders.h" #include "video_core/renderer_opengl/util_shaders.h"
#include "video_core/texture_cache/accelerated_swizzle.h" #include "video_core/texture_cache/accelerated_swizzle.h"
@ -63,7 +64,7 @@ UtilShaders::UtilShaders(ProgramManager& program_manager_)
UtilShaders::~UtilShaders() = default; UtilShaders::~UtilShaders() = default;
void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map, void UtilShaders::ASTCDecode(Image& image, const StagingBufferMap& map,
std::span<const VideoCommon::SwizzleParameters> swizzles) { std::span<const VideoCommon::SwizzleParameters> swizzles) {
static constexpr GLuint BINDING_INPUT_BUFFER = 0; static constexpr GLuint BINDING_INPUT_BUFFER = 0;
static constexpr GLuint BINDING_OUTPUT_IMAGE = 0; static constexpr GLuint BINDING_OUTPUT_IMAGE = 0;
@ -111,7 +112,7 @@ void UtilShaders::ASTCDecode(Image& image, const ImageBufferMap& map,
program_manager.RestoreGuestCompute(); program_manager.RestoreGuestCompute();
} }
void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map, void UtilShaders::BlockLinearUpload2D(Image& image, const StagingBufferMap& map,
std::span<const SwizzleParameters> swizzles) { std::span<const SwizzleParameters> swizzles) {
static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1}; static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1};
static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0; static constexpr GLuint BINDING_SWIZZLE_BUFFER = 0;
@ -148,7 +149,7 @@ void UtilShaders::BlockLinearUpload2D(Image& image, const ImageBufferMap& map,
program_manager.RestoreGuestCompute(); program_manager.RestoreGuestCompute();
} }
void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map, void UtilShaders::BlockLinearUpload3D(Image& image, const StagingBufferMap& map,
std::span<const SwizzleParameters> swizzles) { std::span<const SwizzleParameters> swizzles) {
static constexpr Extent3D WORKGROUP_SIZE{16, 8, 8}; static constexpr Extent3D WORKGROUP_SIZE{16, 8, 8};
@ -189,7 +190,7 @@ void UtilShaders::BlockLinearUpload3D(Image& image, const ImageBufferMap& map,
program_manager.RestoreGuestCompute(); program_manager.RestoreGuestCompute();
} }
void UtilShaders::PitchUpload(Image& image, const ImageBufferMap& map, void UtilShaders::PitchUpload(Image& image, const StagingBufferMap& map,
std::span<const SwizzleParameters> swizzles) { std::span<const SwizzleParameters> swizzles) {
static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1}; static constexpr Extent3D WORKGROUP_SIZE{32, 32, 1};
static constexpr GLuint BINDING_INPUT_BUFFER = 0; static constexpr GLuint BINDING_INPUT_BUFFER = 0;

View file

@ -16,23 +16,23 @@ namespace OpenGL {
class Image; class Image;
class ProgramManager; class ProgramManager;
struct ImageBufferMap; struct StagingBufferMap;
class UtilShaders { class UtilShaders {
public: public:
explicit UtilShaders(ProgramManager& program_manager); explicit UtilShaders(ProgramManager& program_manager);
~UtilShaders(); ~UtilShaders();
void ASTCDecode(Image& image, const ImageBufferMap& map, void ASTCDecode(Image& image, const StagingBufferMap& map,
std::span<const VideoCommon::SwizzleParameters> swizzles); std::span<const VideoCommon::SwizzleParameters> swizzles);
void BlockLinearUpload2D(Image& image, const ImageBufferMap& map, void BlockLinearUpload2D(Image& image, const StagingBufferMap& map,
std::span<const VideoCommon::SwizzleParameters> swizzles); std::span<const VideoCommon::SwizzleParameters> swizzles);
void BlockLinearUpload3D(Image& image, const ImageBufferMap& map, void BlockLinearUpload3D(Image& image, const StagingBufferMap& map,
std::span<const VideoCommon::SwizzleParameters> swizzles); std::span<const VideoCommon::SwizzleParameters> swizzles);
void PitchUpload(Image& image, const ImageBufferMap& map, void PitchUpload(Image& image, const StagingBufferMap& map,
std::span<const VideoCommon::SwizzleParameters> swizzles); std::span<const VideoCommon::SwizzleParameters> swizzles);
void CopyBC4(Image& dst_image, Image& src_image, void CopyBC4(Image& dst_image, Image& src_image,

View file

@ -157,6 +157,7 @@ struct BufferCacheParams {
static constexpr bool USE_MEMORY_MAPS = true; static constexpr bool USE_MEMORY_MAPS = true;
static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = false; static constexpr bool SEPARATE_IMAGE_BUFFER_BINDINGS = false;
static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = true; static constexpr bool IMPLEMENTS_ASYNC_DOWNLOADS = true;
static constexpr bool USE_MEMORY_MAPS_FOR_UPLOADS = true;
}; };
using BufferCache = VideoCommon::BufferCache<BufferCacheParams>; using BufferCache = VideoCommon::BufferCache<BufferCacheParams>;