Merge pull request #3600 from ReinUsesLisp/no-pointer-buf-cache

buffer_cache: Return handles instead of pointer to handles
This commit is contained in:
Fernando Sahmkow 2020-04-16 19:58:13 -04:00 committed by GitHub
commit c81f256111
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
14 changed files with 90 additions and 228 deletions

View file

@ -29,10 +29,10 @@ namespace VideoCommon {
using MapInterval = std::shared_ptr<MapIntervalBase>; using MapInterval = std::shared_ptr<MapIntervalBase>;
template <typename TBuffer, typename TBufferType, typename StreamBuffer> template <typename OwnerBuffer, typename BufferType, typename StreamBuffer>
class BufferCache { class BufferCache {
public: public:
using BufferInfo = std::pair<const TBufferType*, u64>; using BufferInfo = std::pair<BufferType, u64>;
BufferInfo UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4, BufferInfo UploadMemory(GPUVAddr gpu_addr, std::size_t size, std::size_t alignment = 4,
bool is_written = false, bool use_fast_cbuf = false) { bool is_written = false, bool use_fast_cbuf = false) {
@ -89,9 +89,7 @@ public:
} }
} }
const u64 offset = static_cast<u64>(block->GetOffset(cpu_addr)); return {ToHandle(block), static_cast<u64>(block->GetOffset(cpu_addr))};
return {ToHandle(block), offset};
} }
/// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset. /// Uploads from a host memory. Returns the OpenGL buffer where it's located and its offset.
@ -156,7 +154,7 @@ public:
} }
} }
virtual const TBufferType* GetEmptyBuffer(std::size_t size) = 0; virtual BufferType GetEmptyBuffer(std::size_t size) = 0;
protected: protected:
explicit BufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system, explicit BufferCache(VideoCore::RasterizerInterface& rasterizer, Core::System& system,
@ -166,19 +164,19 @@ protected:
~BufferCache() = default; ~BufferCache() = default;
virtual const TBufferType* ToHandle(const TBuffer& storage) = 0; virtual BufferType ToHandle(const OwnerBuffer& storage) = 0;
virtual void WriteBarrier() = 0; virtual void WriteBarrier() = 0;
virtual TBuffer CreateBlock(VAddr cpu_addr, std::size_t size) = 0; virtual OwnerBuffer CreateBlock(VAddr cpu_addr, std::size_t size) = 0;
virtual void UploadBlockData(const TBuffer& buffer, std::size_t offset, std::size_t size, virtual void UploadBlockData(const OwnerBuffer& buffer, std::size_t offset, std::size_t size,
const u8* data) = 0; const u8* data) = 0;
virtual void DownloadBlockData(const TBuffer& buffer, std::size_t offset, std::size_t size, virtual void DownloadBlockData(const OwnerBuffer& buffer, std::size_t offset, std::size_t size,
u8* data) = 0; u8* data) = 0;
virtual void CopyBlock(const TBuffer& src, const TBuffer& dst, std::size_t src_offset, virtual void CopyBlock(const OwnerBuffer& src, const OwnerBuffer& dst, std::size_t src_offset,
std::size_t dst_offset, std::size_t size) = 0; std::size_t dst_offset, std::size_t size) = 0;
virtual BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) { virtual BufferInfo ConstBufferUpload(const void* raw_pointer, std::size_t size) {
@ -221,9 +219,8 @@ private:
return std::make_shared<MapIntervalBase>(start, end, gpu_addr); return std::make_shared<MapIntervalBase>(start, end, gpu_addr);
} }
MapInterval MapAddress(const TBuffer& block, const GPUVAddr gpu_addr, const VAddr cpu_addr, MapInterval MapAddress(const OwnerBuffer& block, const GPUVAddr gpu_addr, const VAddr cpu_addr,
const std::size_t size) { const std::size_t size) {
std::vector<MapInterval> overlaps = GetMapsInRange(cpu_addr, size); std::vector<MapInterval> overlaps = GetMapsInRange(cpu_addr, size);
if (overlaps.empty()) { if (overlaps.empty()) {
auto& memory_manager = system.GPU().MemoryManager(); auto& memory_manager = system.GPU().MemoryManager();
@ -272,7 +269,7 @@ private:
return new_map; return new_map;
} }
void UpdateBlock(const TBuffer& block, VAddr start, VAddr end, void UpdateBlock(const OwnerBuffer& block, VAddr start, VAddr end,
std::vector<MapInterval>& overlaps) { std::vector<MapInterval>& overlaps) {
const IntervalType base_interval{start, end}; const IntervalType base_interval{start, end};
IntervalSet interval_set{}; IntervalSet interval_set{};
@ -313,7 +310,7 @@ private:
void FlushMap(MapInterval map) { void FlushMap(MapInterval map) {
std::size_t size = map->GetEnd() - map->GetStart(); std::size_t size = map->GetEnd() - map->GetStart();
TBuffer block = blocks[map->GetStart() >> block_page_bits]; OwnerBuffer block = blocks[map->GetStart() >> block_page_bits];
staging_buffer.resize(size); staging_buffer.resize(size);
DownloadBlockData(block, block->GetOffset(map->GetStart()), size, staging_buffer.data()); DownloadBlockData(block, block->GetOffset(map->GetStart()), size, staging_buffer.data());
system.Memory().WriteBlockUnsafe(map->GetStart(), staging_buffer.data(), size); system.Memory().WriteBlockUnsafe(map->GetStart(), staging_buffer.data(), size);
@ -328,7 +325,7 @@ private:
buffer_ptr += size; buffer_ptr += size;
buffer_offset += size; buffer_offset += size;
return {&stream_buffer_handle, uploaded_offset}; return {stream_buffer_handle, uploaded_offset};
} }
void AlignBuffer(std::size_t alignment) { void AlignBuffer(std::size_t alignment) {
@ -338,11 +335,11 @@ private:
buffer_offset = offset_aligned; buffer_offset = offset_aligned;
} }
TBuffer EnlargeBlock(TBuffer buffer) { OwnerBuffer EnlargeBlock(OwnerBuffer buffer) {
const std::size_t old_size = buffer->GetSize(); const std::size_t old_size = buffer->GetSize();
const std::size_t new_size = old_size + block_page_size; const std::size_t new_size = old_size + block_page_size;
const VAddr cpu_addr = buffer->GetCpuAddr(); const VAddr cpu_addr = buffer->GetCpuAddr();
TBuffer new_buffer = CreateBlock(cpu_addr, new_size); OwnerBuffer new_buffer = CreateBlock(cpu_addr, new_size);
CopyBlock(buffer, new_buffer, 0, 0, old_size); CopyBlock(buffer, new_buffer, 0, 0, old_size);
buffer->SetEpoch(epoch); buffer->SetEpoch(epoch);
pending_destruction.push_back(buffer); pending_destruction.push_back(buffer);
@ -356,14 +353,14 @@ private:
return new_buffer; return new_buffer;
} }
TBuffer MergeBlocks(TBuffer first, TBuffer second) { OwnerBuffer MergeBlocks(OwnerBuffer first, OwnerBuffer second) {
const std::size_t size_1 = first->GetSize(); const std::size_t size_1 = first->GetSize();
const std::size_t size_2 = second->GetSize(); const std::size_t size_2 = second->GetSize();
const VAddr first_addr = first->GetCpuAddr(); const VAddr first_addr = first->GetCpuAddr();
const VAddr second_addr = second->GetCpuAddr(); const VAddr second_addr = second->GetCpuAddr();
const VAddr new_addr = std::min(first_addr, second_addr); const VAddr new_addr = std::min(first_addr, second_addr);
const std::size_t new_size = size_1 + size_2; const std::size_t new_size = size_1 + size_2;
TBuffer new_buffer = CreateBlock(new_addr, new_size); OwnerBuffer new_buffer = CreateBlock(new_addr, new_size);
CopyBlock(first, new_buffer, 0, new_buffer->GetOffset(first_addr), size_1); CopyBlock(first, new_buffer, 0, new_buffer->GetOffset(first_addr), size_1);
CopyBlock(second, new_buffer, 0, new_buffer->GetOffset(second_addr), size_2); CopyBlock(second, new_buffer, 0, new_buffer->GetOffset(second_addr), size_2);
first->SetEpoch(epoch); first->SetEpoch(epoch);
@ -380,8 +377,8 @@ private:
return new_buffer; return new_buffer;
} }
TBuffer GetBlock(const VAddr cpu_addr, const std::size_t size) { OwnerBuffer GetBlock(const VAddr cpu_addr, const std::size_t size) {
TBuffer found{}; OwnerBuffer found;
const VAddr cpu_addr_end = cpu_addr + size - 1; const VAddr cpu_addr_end = cpu_addr + size - 1;
u64 page_start = cpu_addr >> block_page_bits; u64 page_start = cpu_addr >> block_page_bits;
const u64 page_end = cpu_addr_end >> block_page_bits; const u64 page_end = cpu_addr_end >> block_page_bits;
@ -457,7 +454,7 @@ private:
Core::System& system; Core::System& system;
std::unique_ptr<StreamBuffer> stream_buffer; std::unique_ptr<StreamBuffer> stream_buffer;
TBufferType stream_buffer_handle{}; BufferType stream_buffer_handle{};
bool invalidated = false; bool invalidated = false;
@ -475,9 +472,9 @@ private:
static constexpr u64 block_page_bits = 21; static constexpr u64 block_page_bits = 21;
static constexpr u64 block_page_size = 1ULL << block_page_bits; static constexpr u64 block_page_size = 1ULL << block_page_bits;
std::unordered_map<u64, TBuffer> blocks; std::unordered_map<u64, OwnerBuffer> blocks;
std::list<TBuffer> pending_destruction; std::list<OwnerBuffer> pending_destruction;
u64 epoch = 0; u64 epoch = 0;
u64 modified_ticks = 0; u64 modified_ticks = 0;

View file

@ -55,33 +55,31 @@ void OGLBufferCache::WriteBarrier() {
glMemoryBarrier(GL_ALL_BARRIER_BITS); glMemoryBarrier(GL_ALL_BARRIER_BITS);
} }
const GLuint* OGLBufferCache::ToHandle(const Buffer& buffer) { GLuint OGLBufferCache::ToHandle(const Buffer& buffer) {
return buffer->GetHandle(); return buffer->GetHandle();
} }
const GLuint* OGLBufferCache::GetEmptyBuffer(std::size_t) { GLuint OGLBufferCache::GetEmptyBuffer(std::size_t) {
static const GLuint null_buffer = 0; return 0;
return &null_buffer;
} }
void OGLBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, void OGLBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
const u8* data) { const u8* data) {
glNamedBufferSubData(*buffer->GetHandle(), static_cast<GLintptr>(offset), glNamedBufferSubData(buffer->GetHandle(), static_cast<GLintptr>(offset),
static_cast<GLsizeiptr>(size), data); static_cast<GLsizeiptr>(size), data);
} }
void OGLBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, void OGLBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
u8* data) { u8* data) {
MICROPROFILE_SCOPE(OpenGL_Buffer_Download); MICROPROFILE_SCOPE(OpenGL_Buffer_Download);
glGetNamedBufferSubData(*buffer->GetHandle(), static_cast<GLintptr>(offset), glGetNamedBufferSubData(buffer->GetHandle(), static_cast<GLintptr>(offset),
static_cast<GLsizeiptr>(size), data); static_cast<GLsizeiptr>(size), data);
} }
void OGLBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset, void OGLBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset,
std::size_t dst_offset, std::size_t size) { std::size_t dst_offset, std::size_t size) {
glCopyNamedBufferSubData(*src->GetHandle(), *dst->GetHandle(), glCopyNamedBufferSubData(src->GetHandle(), dst->GetHandle(), static_cast<GLintptr>(src_offset),
static_cast<GLintptr>(src_offset), static_cast<GLintptr>(dst_offset), static_cast<GLintptr>(dst_offset), static_cast<GLsizeiptr>(size));
static_cast<GLsizeiptr>(size));
} }
OGLBufferCache::BufferInfo OGLBufferCache::ConstBufferUpload(const void* raw_pointer, OGLBufferCache::BufferInfo OGLBufferCache::ConstBufferUpload(const void* raw_pointer,
@ -89,7 +87,7 @@ OGLBufferCache::BufferInfo OGLBufferCache::ConstBufferUpload(const void* raw_poi
DEBUG_ASSERT(cbuf_cursor < std::size(cbufs)); DEBUG_ASSERT(cbuf_cursor < std::size(cbufs));
const GLuint& cbuf = cbufs[cbuf_cursor++]; const GLuint& cbuf = cbufs[cbuf_cursor++];
glNamedBufferSubData(cbuf, 0, static_cast<GLsizeiptr>(size), raw_pointer); glNamedBufferSubData(cbuf, 0, static_cast<GLsizeiptr>(size), raw_pointer);
return {&cbuf, 0}; return {cbuf, 0};
} }
} // namespace OpenGL } // namespace OpenGL

View file

@ -34,12 +34,12 @@ public:
explicit CachedBufferBlock(VAddr cpu_addr, const std::size_t size); explicit CachedBufferBlock(VAddr cpu_addr, const std::size_t size);
~CachedBufferBlock(); ~CachedBufferBlock();
const GLuint* GetHandle() const { GLuint GetHandle() const {
return &gl_buffer.handle; return gl_buffer.handle;
} }
private: private:
OGLBuffer gl_buffer{}; OGLBuffer gl_buffer;
}; };
class OGLBufferCache final : public GenericBufferCache { class OGLBufferCache final : public GenericBufferCache {
@ -48,7 +48,7 @@ public:
const Device& device, std::size_t stream_size); const Device& device, std::size_t stream_size);
~OGLBufferCache(); ~OGLBufferCache();
const GLuint* GetEmptyBuffer(std::size_t) override; GLuint GetEmptyBuffer(std::size_t) override;
void Acquire() noexcept { void Acquire() noexcept {
cbuf_cursor = 0; cbuf_cursor = 0;
@ -57,9 +57,9 @@ public:
protected: protected:
Buffer CreateBlock(VAddr cpu_addr, std::size_t size) override; Buffer CreateBlock(VAddr cpu_addr, std::size_t size) override;
void WriteBarrier() override; GLuint ToHandle(const Buffer& buffer) override;
const GLuint* ToHandle(const Buffer& buffer) override; void WriteBarrier() override;
void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
const u8* data) override; const u8* data) override;

View file

@ -188,10 +188,8 @@ void RasterizerOpenGL::SetupVertexBuffer() {
ASSERT(end > start); ASSERT(end > start);
const u64 size = end - start + 1; const u64 size = end - start + 1;
const auto [vertex_buffer, vertex_buffer_offset] = buffer_cache.UploadMemory(start, size); const auto [vertex_buffer, vertex_buffer_offset] = buffer_cache.UploadMemory(start, size);
glBindVertexBuffer(static_cast<GLuint>(index), vertex_buffer, vertex_buffer_offset,
// Bind the vertex array to the buffer at the current offset. vertex_array.stride);
vertex_array_pushbuffer.SetVertexBuffer(static_cast<GLuint>(index), vertex_buffer,
vertex_buffer_offset, vertex_array.stride);
} }
} }
@ -222,7 +220,7 @@ GLintptr RasterizerOpenGL::SetupIndexBuffer() {
const auto& regs = system.GPU().Maxwell3D().regs; const auto& regs = system.GPU().Maxwell3D().regs;
const std::size_t size = CalculateIndexBufferSize(); const std::size_t size = CalculateIndexBufferSize();
const auto [buffer, offset] = buffer_cache.UploadMemory(regs.index_array.IndexStart(), size); const auto [buffer, offset] = buffer_cache.UploadMemory(regs.index_array.IndexStart(), size);
vertex_array_pushbuffer.SetIndexBuffer(buffer); glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, buffer);
return offset; return offset;
} }
@ -524,7 +522,6 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
// Prepare vertex array format. // Prepare vertex array format.
SetupVertexFormat(); SetupVertexFormat();
vertex_array_pushbuffer.Setup();
// Upload vertex and index data. // Upload vertex and index data.
SetupVertexBuffer(); SetupVertexBuffer();
@ -534,16 +531,12 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
index_buffer_offset = SetupIndexBuffer(); index_buffer_offset = SetupIndexBuffer();
} }
// Prepare packed bindings.
bind_ubo_pushbuffer.Setup();
bind_ssbo_pushbuffer.Setup();
// Setup emulation uniform buffer. // Setup emulation uniform buffer.
GLShader::MaxwellUniformData ubo; GLShader::MaxwellUniformData ubo;
ubo.SetFromRegs(gpu); ubo.SetFromRegs(gpu);
const auto [buffer, offset] = const auto [buffer, offset] =
buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment()); buffer_cache.UploadHostMemory(&ubo, sizeof(ubo), device.GetUniformBufferAlignment());
bind_ubo_pushbuffer.Push(EmulationUniformBlockBinding, buffer, offset, glBindBufferRange(GL_UNIFORM_BUFFER, EmulationUniformBlockBinding, buffer, offset,
static_cast<GLsizeiptr>(sizeof(ubo))); static_cast<GLsizeiptr>(sizeof(ubo)));
// Setup shaders and their used resources. // Setup shaders and their used resources.
@ -557,11 +550,6 @@ void RasterizerOpenGL::Draw(bool is_indexed, bool is_instanced) {
// Signal the buffer cache that we are not going to upload more things. // Signal the buffer cache that we are not going to upload more things.
buffer_cache.Unmap(); buffer_cache.Unmap();
// Now that we are no longer uploading data, we can safely bind the buffers to OpenGL.
vertex_array_pushbuffer.Bind();
bind_ubo_pushbuffer.Bind();
bind_ssbo_pushbuffer.Bind();
program_manager.BindGraphicsPipeline(); program_manager.BindGraphicsPipeline();
if (texture_cache.TextureBarrier()) { if (texture_cache.TextureBarrier()) {
@ -630,17 +618,11 @@ void RasterizerOpenGL::DispatchCompute(GPUVAddr code_addr) {
(Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment()); (Maxwell::MaxConstBufferSize + device.GetUniformBufferAlignment());
buffer_cache.Map(buffer_size); buffer_cache.Map(buffer_size);
bind_ubo_pushbuffer.Setup();
bind_ssbo_pushbuffer.Setup();
SetupComputeConstBuffers(kernel); SetupComputeConstBuffers(kernel);
SetupComputeGlobalMemory(kernel); SetupComputeGlobalMemory(kernel);
buffer_cache.Unmap(); buffer_cache.Unmap();
bind_ubo_pushbuffer.Bind();
bind_ssbo_pushbuffer.Bind();
const auto& launch_desc = system.GPU().KeplerCompute().launch_description; const auto& launch_desc = system.GPU().KeplerCompute().launch_description;
glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z); glDispatchCompute(launch_desc.grid_dim_x, launch_desc.grid_dim_y, launch_desc.grid_dim_z);
++num_queued_commands; ++num_queued_commands;
@ -771,7 +753,7 @@ void RasterizerOpenGL::SetupConstBuffer(u32 binding, const Tegra::Engines::Const
const ConstBufferEntry& entry) { const ConstBufferEntry& entry) {
if (!buffer.enabled) { if (!buffer.enabled) {
// Set values to zero to unbind buffers // Set values to zero to unbind buffers
bind_ubo_pushbuffer.Push(binding, buffer_cache.GetEmptyBuffer(sizeof(float)), 0, glBindBufferRange(GL_UNIFORM_BUFFER, binding, buffer_cache.GetEmptyBuffer(sizeof(float)), 0,
sizeof(float)); sizeof(float));
return; return;
} }
@ -783,7 +765,7 @@ void RasterizerOpenGL::SetupConstBuffer(u32 binding, const Tegra::Engines::Const
const auto alignment = device.GetUniformBufferAlignment(); const auto alignment = device.GetUniformBufferAlignment();
const auto [cbuf, offset] = buffer_cache.UploadMemory(buffer.address, size, alignment, false, const auto [cbuf, offset] = buffer_cache.UploadMemory(buffer.address, size, alignment, false,
device.HasFastBufferSubData()); device.HasFastBufferSubData());
bind_ubo_pushbuffer.Push(binding, cbuf, offset, size); glBindBufferRange(GL_UNIFORM_BUFFER, binding, cbuf, offset, size);
} }
void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader) { void RasterizerOpenGL::SetupDrawGlobalMemory(std::size_t stage_index, const Shader& shader) {
@ -819,7 +801,8 @@ void RasterizerOpenGL::SetupGlobalMemory(u32 binding, const GlobalMemoryEntry& e
const auto alignment{device.GetShaderStorageBufferAlignment()}; const auto alignment{device.GetShaderStorageBufferAlignment()};
const auto [ssbo, buffer_offset] = const auto [ssbo, buffer_offset] =
buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.IsWritten()); buffer_cache.UploadMemory(gpu_addr, size, alignment, entry.IsWritten());
bind_ssbo_pushbuffer.Push(binding, ssbo, buffer_offset, static_cast<GLsizeiptr>(size)); glBindBufferRange(GL_SHADER_STORAGE_BUFFER, binding, ssbo, buffer_offset,
static_cast<GLsizeiptr>(size));
} }
void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, const Shader& shader) { void RasterizerOpenGL::SetupDrawTextures(std::size_t stage_index, const Shader& shader) {
@ -1432,7 +1415,7 @@ void RasterizerOpenGL::EndTransformFeedback() {
const GPUVAddr gpu_addr = binding.Address(); const GPUVAddr gpu_addr = binding.Address();
const std::size_t size = binding.buffer_size; const std::size_t size = binding.buffer_size;
const auto [dest_buffer, offset] = buffer_cache.UploadMemory(gpu_addr, size, 4, true); const auto [dest_buffer, offset] = buffer_cache.UploadMemory(gpu_addr, size, 4, true);
glCopyNamedBufferSubData(handle, *dest_buffer, 0, offset, static_cast<GLsizeiptr>(size)); glCopyNamedBufferSubData(handle, dest_buffer, 0, offset, static_cast<GLsizeiptr>(size));
} }
} }

View file

@ -231,9 +231,7 @@ private:
static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024; static constexpr std::size_t STREAM_BUFFER_SIZE = 128 * 1024 * 1024;
OGLBufferCache buffer_cache; OGLBufferCache buffer_cache;
VertexArrayPushBuffer vertex_array_pushbuffer{state_tracker}; GLint vertex_binding = 0;
BindBuffersRangePushBuffer bind_ubo_pushbuffer{GL_UNIFORM_BUFFER};
BindBuffersRangePushBuffer bind_ssbo_pushbuffer{GL_SHADER_STORAGE_BUFFER};
std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers> std::array<OGLBuffer, Tegra::Engines::Maxwell3D::Regs::NumTransformFeedbackBuffers>
transform_feedback_buffers; transform_feedback_buffers;

View file

@ -14,68 +14,6 @@
namespace OpenGL { namespace OpenGL {
struct VertexArrayPushBuffer::Entry {
GLuint binding_index{};
const GLuint* buffer{};
GLintptr offset{};
GLsizei stride{};
};
VertexArrayPushBuffer::VertexArrayPushBuffer(StateTracker& state_tracker)
: state_tracker{state_tracker} {}
VertexArrayPushBuffer::~VertexArrayPushBuffer() = default;
void VertexArrayPushBuffer::Setup() {
index_buffer = nullptr;
vertex_buffers.clear();
}
void VertexArrayPushBuffer::SetIndexBuffer(const GLuint* buffer) {
index_buffer = buffer;
}
void VertexArrayPushBuffer::SetVertexBuffer(GLuint binding_index, const GLuint* buffer,
GLintptr offset, GLsizei stride) {
vertex_buffers.push_back(Entry{binding_index, buffer, offset, stride});
}
void VertexArrayPushBuffer::Bind() {
if (index_buffer) {
state_tracker.BindIndexBuffer(*index_buffer);
}
for (const auto& entry : vertex_buffers) {
glBindVertexBuffer(entry.binding_index, *entry.buffer, entry.offset, entry.stride);
}
}
struct BindBuffersRangePushBuffer::Entry {
GLuint binding;
const GLuint* buffer;
GLintptr offset;
GLsizeiptr size;
};
BindBuffersRangePushBuffer::BindBuffersRangePushBuffer(GLenum target) : target{target} {}
BindBuffersRangePushBuffer::~BindBuffersRangePushBuffer() = default;
void BindBuffersRangePushBuffer::Setup() {
entries.clear();
}
void BindBuffersRangePushBuffer::Push(GLuint binding, const GLuint* buffer, GLintptr offset,
GLsizeiptr size) {
entries.push_back(Entry{binding, buffer, offset, size});
}
void BindBuffersRangePushBuffer::Bind() {
for (const Entry& entry : entries) {
glBindBufferRange(target, entry.binding, *entry.buffer, entry.offset, entry.size);
}
}
void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_view extra_info) { void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_view extra_info) {
if (!GLAD_GL_KHR_debug) { if (!GLAD_GL_KHR_debug) {
// We don't need to throw an error as this is just for debugging // We don't need to throw an error as this is just for debugging

View file

@ -11,49 +11,6 @@
namespace OpenGL { namespace OpenGL {
class StateTracker;
class VertexArrayPushBuffer final {
public:
explicit VertexArrayPushBuffer(StateTracker& state_tracker);
~VertexArrayPushBuffer();
void Setup();
void SetIndexBuffer(const GLuint* buffer);
void SetVertexBuffer(GLuint binding_index, const GLuint* buffer, GLintptr offset,
GLsizei stride);
void Bind();
private:
struct Entry;
StateTracker& state_tracker;
const GLuint* index_buffer{};
std::vector<Entry> vertex_buffers;
};
class BindBuffersRangePushBuffer final {
public:
explicit BindBuffersRangePushBuffer(GLenum target);
~BindBuffersRangePushBuffer();
void Setup();
void Push(GLuint binding, const GLuint* buffer, GLintptr offset, GLsizeiptr size);
void Bind();
private:
struct Entry;
GLenum target;
std::vector<Entry> entries;
};
void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_view extra_info = {}); void LabelGLObject(GLenum identifier, GLuint handle, VAddr addr, std::string_view extra_info = {});
} // namespace OpenGL } // namespace OpenGL

View file

@ -74,18 +74,18 @@ Buffer VKBufferCache::CreateBlock(VAddr cpu_addr, std::size_t size) {
return std::make_shared<CachedBufferBlock>(device, memory_manager, cpu_addr, size); return std::make_shared<CachedBufferBlock>(device, memory_manager, cpu_addr, size);
} }
const VkBuffer* VKBufferCache::ToHandle(const Buffer& buffer) { VkBuffer VKBufferCache::ToHandle(const Buffer& buffer) {
return buffer->GetHandle(); return buffer->GetHandle();
} }
const VkBuffer* VKBufferCache::GetEmptyBuffer(std::size_t size) { VkBuffer VKBufferCache::GetEmptyBuffer(std::size_t size) {
size = std::max(size, std::size_t(4)); size = std::max(size, std::size_t(4));
const auto& empty = staging_pool.GetUnusedBuffer(size, false); const auto& empty = staging_pool.GetUnusedBuffer(size, false);
scheduler.RequestOutsideRenderPassOperationContext(); scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([size, buffer = *empty.handle](vk::CommandBuffer cmdbuf) { scheduler.Record([size, buffer = *empty.handle](vk::CommandBuffer cmdbuf) {
cmdbuf.FillBuffer(buffer, 0, size, 0); cmdbuf.FillBuffer(buffer, 0, size, 0);
}); });
return empty.handle.address(); return *empty.handle;
} }
void VKBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, void VKBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
@ -94,7 +94,7 @@ void VKBufferCache::UploadBlockData(const Buffer& buffer, std::size_t offset, st
std::memcpy(staging.commit->Map(size), data, size); std::memcpy(staging.commit->Map(size), data, size);
scheduler.RequestOutsideRenderPassOperationContext(); scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([staging = *staging.handle, buffer = *buffer->GetHandle(), offset, scheduler.Record([staging = *staging.handle, buffer = buffer->GetHandle(), offset,
size](vk::CommandBuffer cmdbuf) { size](vk::CommandBuffer cmdbuf) {
cmdbuf.CopyBuffer(staging, buffer, VkBufferCopy{0, offset, size}); cmdbuf.CopyBuffer(staging, buffer, VkBufferCopy{0, offset, size});
@ -117,7 +117,7 @@ void VKBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset,
u8* data) { u8* data) {
const auto& staging = staging_pool.GetUnusedBuffer(size, true); const auto& staging = staging_pool.GetUnusedBuffer(size, true);
scheduler.RequestOutsideRenderPassOperationContext(); scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([staging = *staging.handle, buffer = *buffer->GetHandle(), offset, scheduler.Record([staging = *staging.handle, buffer = buffer->GetHandle(), offset,
size](vk::CommandBuffer cmdbuf) { size](vk::CommandBuffer cmdbuf) {
VkBufferMemoryBarrier barrier; VkBufferMemoryBarrier barrier;
barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER; barrier.sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
@ -144,7 +144,7 @@ void VKBufferCache::DownloadBlockData(const Buffer& buffer, std::size_t offset,
void VKBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset, void VKBufferCache::CopyBlock(const Buffer& src, const Buffer& dst, std::size_t src_offset,
std::size_t dst_offset, std::size_t size) { std::size_t dst_offset, std::size_t size) {
scheduler.RequestOutsideRenderPassOperationContext(); scheduler.RequestOutsideRenderPassOperationContext();
scheduler.Record([src_buffer = *src->GetHandle(), dst_buffer = *dst->GetHandle(), src_offset, scheduler.Record([src_buffer = src->GetHandle(), dst_buffer = dst->GetHandle(), src_offset,
dst_offset, size](vk::CommandBuffer cmdbuf) { dst_offset, size](vk::CommandBuffer cmdbuf) {
cmdbuf.CopyBuffer(src_buffer, dst_buffer, VkBufferCopy{src_offset, dst_offset, size}); cmdbuf.CopyBuffer(src_buffer, dst_buffer, VkBufferCopy{src_offset, dst_offset, size});

View file

@ -33,8 +33,8 @@ public:
VAddr cpu_addr, std::size_t size); VAddr cpu_addr, std::size_t size);
~CachedBufferBlock(); ~CachedBufferBlock();
const VkBuffer* GetHandle() const { VkBuffer GetHandle() const {
return buffer.handle.address(); return *buffer.handle;
} }
private: private:
@ -50,15 +50,15 @@ public:
VKScheduler& scheduler, VKStagingBufferPool& staging_pool); VKScheduler& scheduler, VKStagingBufferPool& staging_pool);
~VKBufferCache(); ~VKBufferCache();
const VkBuffer* GetEmptyBuffer(std::size_t size) override; VkBuffer GetEmptyBuffer(std::size_t size) override;
protected: protected:
VkBuffer ToHandle(const Buffer& buffer) override;
void WriteBarrier() override {} void WriteBarrier() override {}
Buffer CreateBlock(VAddr cpu_addr, std::size_t size) override; Buffer CreateBlock(VAddr cpu_addr, std::size_t size) override;
const VkBuffer* ToHandle(const Buffer& buffer) override;
void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size, void UploadBlockData(const Buffer& buffer, std::size_t offset, std::size_t size,
const u8* data) override; const u8* data) override;

View file

@ -343,13 +343,13 @@ QuadArrayPass::QuadArrayPass(const VKDevice& device, VKScheduler& scheduler,
QuadArrayPass::~QuadArrayPass() = default; QuadArrayPass::~QuadArrayPass() = default;
std::pair<const VkBuffer*, VkDeviceSize> QuadArrayPass::Assemble(u32 num_vertices, u32 first) { std::pair<VkBuffer, VkDeviceSize> QuadArrayPass::Assemble(u32 num_vertices, u32 first) {
const u32 num_triangle_vertices = num_vertices * 6 / 4; const u32 num_triangle_vertices = num_vertices * 6 / 4;
const std::size_t staging_size = num_triangle_vertices * sizeof(u32); const std::size_t staging_size = num_triangle_vertices * sizeof(u32);
auto& buffer = staging_buffer_pool.GetUnusedBuffer(staging_size, false); auto& buffer = staging_buffer_pool.GetUnusedBuffer(staging_size, false);
update_descriptor_queue.Acquire(); update_descriptor_queue.Acquire();
update_descriptor_queue.AddBuffer(buffer.handle.address(), 0, staging_size); update_descriptor_queue.AddBuffer(*buffer.handle, 0, staging_size);
const auto set = CommitDescriptorSet(update_descriptor_queue, scheduler.GetFence()); const auto set = CommitDescriptorSet(update_descriptor_queue, scheduler.GetFence());
scheduler.RequestOutsideRenderPassOperationContext(); scheduler.RequestOutsideRenderPassOperationContext();
@ -377,7 +377,7 @@ std::pair<const VkBuffer*, VkDeviceSize> QuadArrayPass::Assemble(u32 num_vertice
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, {barrier}, {}); VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, {barrier}, {});
}); });
return {buffer.handle.address(), 0}; return {*buffer.handle, 0};
} }
Uint8Pass::Uint8Pass(const VKDevice& device, VKScheduler& scheduler, Uint8Pass::Uint8Pass(const VKDevice& device, VKScheduler& scheduler,
@ -391,14 +391,14 @@ Uint8Pass::Uint8Pass(const VKDevice& device, VKScheduler& scheduler,
Uint8Pass::~Uint8Pass() = default; Uint8Pass::~Uint8Pass() = default;
std::pair<const VkBuffer*, u64> Uint8Pass::Assemble(u32 num_vertices, VkBuffer src_buffer, std::pair<VkBuffer, u64> Uint8Pass::Assemble(u32 num_vertices, VkBuffer src_buffer,
u64 src_offset) { u64 src_offset) {
const auto staging_size = static_cast<u32>(num_vertices * sizeof(u16)); const auto staging_size = static_cast<u32>(num_vertices * sizeof(u16));
auto& buffer = staging_buffer_pool.GetUnusedBuffer(staging_size, false); auto& buffer = staging_buffer_pool.GetUnusedBuffer(staging_size, false);
update_descriptor_queue.Acquire(); update_descriptor_queue.Acquire();
update_descriptor_queue.AddBuffer(&src_buffer, src_offset, num_vertices); update_descriptor_queue.AddBuffer(src_buffer, src_offset, num_vertices);
update_descriptor_queue.AddBuffer(buffer.handle.address(), 0, staging_size); update_descriptor_queue.AddBuffer(*buffer.handle, 0, staging_size);
const auto set = CommitDescriptorSet(update_descriptor_queue, scheduler.GetFence()); const auto set = CommitDescriptorSet(update_descriptor_queue, scheduler.GetFence());
scheduler.RequestOutsideRenderPassOperationContext(); scheduler.RequestOutsideRenderPassOperationContext();
@ -422,7 +422,7 @@ std::pair<const VkBuffer*, u64> Uint8Pass::Assemble(u32 num_vertices, VkBuffer s
cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, cmdbuf.PipelineBarrier(VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT,
VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, barrier, {}); VK_PIPELINE_STAGE_VERTEX_INPUT_BIT, 0, {}, barrier, {});
}); });
return {buffer.handle.address(), 0}; return {*buffer.handle, 0};
} }
} // namespace Vulkan } // namespace Vulkan

View file

@ -50,7 +50,7 @@ public:
VKUpdateDescriptorQueue& update_descriptor_queue); VKUpdateDescriptorQueue& update_descriptor_queue);
~QuadArrayPass(); ~QuadArrayPass();
std::pair<const VkBuffer*, VkDeviceSize> Assemble(u32 num_vertices, u32 first); std::pair<VkBuffer, VkDeviceSize> Assemble(u32 num_vertices, u32 first);
private: private:
VKScheduler& scheduler; VKScheduler& scheduler;
@ -65,7 +65,7 @@ public:
VKUpdateDescriptorQueue& update_descriptor_queue); VKUpdateDescriptorQueue& update_descriptor_queue);
~Uint8Pass(); ~Uint8Pass();
std::pair<const VkBuffer*, u64> Assemble(u32 num_vertices, VkBuffer src_buffer, u64 src_offset); std::pair<VkBuffer, u64> Assemble(u32 num_vertices, VkBuffer src_buffer, u64 src_offset);
private: private:
VKScheduler& scheduler; VKScheduler& scheduler;

View file

@ -137,13 +137,13 @@ Tegra::Texture::FullTextureInfo GetTextureInfo(const Engine& engine, const Entry
class BufferBindings final { class BufferBindings final {
public: public:
void AddVertexBinding(const VkBuffer* buffer, VkDeviceSize offset) { void AddVertexBinding(VkBuffer buffer, VkDeviceSize offset) {
vertex.buffer_ptrs[vertex.num_buffers] = buffer; vertex.buffers[vertex.num_buffers] = buffer;
vertex.offsets[vertex.num_buffers] = offset; vertex.offsets[vertex.num_buffers] = offset;
++vertex.num_buffers; ++vertex.num_buffers;
} }
void SetIndexBinding(const VkBuffer* buffer, VkDeviceSize offset, VkIndexType type) { void SetIndexBinding(VkBuffer buffer, VkDeviceSize offset, VkIndexType type) {
index.buffer = buffer; index.buffer = buffer;
index.offset = offset; index.offset = offset;
index.type = type; index.type = type;
@ -227,19 +227,19 @@ private:
// Some of these fields are intentionally left uninitialized to avoid initializing them twice. // Some of these fields are intentionally left uninitialized to avoid initializing them twice.
struct { struct {
std::size_t num_buffers = 0; std::size_t num_buffers = 0;
std::array<const VkBuffer*, Maxwell::NumVertexArrays> buffer_ptrs; std::array<VkBuffer, Maxwell::NumVertexArrays> buffers;
std::array<VkDeviceSize, Maxwell::NumVertexArrays> offsets; std::array<VkDeviceSize, Maxwell::NumVertexArrays> offsets;
} vertex; } vertex;
struct { struct {
const VkBuffer* buffer = nullptr; VkBuffer buffer = nullptr;
VkDeviceSize offset; VkDeviceSize offset;
VkIndexType type; VkIndexType type;
} index; } index;
template <std::size_t N> template <std::size_t N>
void BindStatic(VKScheduler& scheduler) const { void BindStatic(VKScheduler& scheduler) const {
if (index.buffer != nullptr) { if (index.buffer) {
BindStatic<N, true>(scheduler); BindStatic<N, true>(scheduler);
} else { } else {
BindStatic<N, false>(scheduler); BindStatic<N, false>(scheduler);
@ -254,18 +254,14 @@ private:
} }
std::array<VkBuffer, N> buffers; std::array<VkBuffer, N> buffers;
std::transform(vertex.buffer_ptrs.begin(), vertex.buffer_ptrs.begin() + N, buffers.begin(),
[](const auto ptr) { return *ptr; });
std::array<VkDeviceSize, N> offsets; std::array<VkDeviceSize, N> offsets;
std::copy(vertex.buffers.begin(), vertex.buffers.begin() + N, buffers.begin());
std::copy(vertex.offsets.begin(), vertex.offsets.begin() + N, offsets.begin()); std::copy(vertex.offsets.begin(), vertex.offsets.begin() + N, offsets.begin());
if constexpr (is_indexed) { if constexpr (is_indexed) {
// Indexed draw // Indexed draw
scheduler.Record([buffers, offsets, index_buffer = *index.buffer, scheduler.Record([buffers, offsets, index = index](vk::CommandBuffer cmdbuf) {
index_offset = index.offset, cmdbuf.BindIndexBuffer(index.buffer, index.offset, index.type);
index_type = index.type](vk::CommandBuffer cmdbuf) {
cmdbuf.BindIndexBuffer(index_buffer, index_offset, index_type);
cmdbuf.BindVertexBuffers(0, static_cast<u32>(N), buffers.data(), offsets.data()); cmdbuf.BindVertexBuffers(0, static_cast<u32>(N), buffers.data(), offsets.data());
}); });
} else { } else {
@ -790,7 +786,7 @@ void RasterizerVulkan::BeginTransformFeedback() {
const std::size_t size = binding.buffer_size; const std::size_t size = binding.buffer_size;
const auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, size, 4, true); const auto [buffer, offset] = buffer_cache.UploadMemory(gpu_addr, size, 4, true);
scheduler.Record([buffer = *buffer, offset = offset, size](vk::CommandBuffer cmdbuf) { scheduler.Record([buffer = buffer, offset = offset, size](vk::CommandBuffer cmdbuf) {
cmdbuf.BindTransformFeedbackBuffersEXT(0, 1, &buffer, &offset, &size); cmdbuf.BindTransformFeedbackBuffersEXT(0, 1, &buffer, &offset, &size);
cmdbuf.BeginTransformFeedbackEXT(0, 0, nullptr, nullptr); cmdbuf.BeginTransformFeedbackEXT(0, 0, nullptr, nullptr);
}); });
@ -870,7 +866,7 @@ void RasterizerVulkan::SetupIndexBuffer(BufferBindings& buffer_bindings, DrawPar
auto format = regs.index_array.format; auto format = regs.index_array.format;
const bool is_uint8 = format == Maxwell::IndexFormat::UnsignedByte; const bool is_uint8 = format == Maxwell::IndexFormat::UnsignedByte;
if (is_uint8 && !device.IsExtIndexTypeUint8Supported()) { if (is_uint8 && !device.IsExtIndexTypeUint8Supported()) {
std::tie(buffer, offset) = uint8_pass.Assemble(params.num_vertices, *buffer, offset); std::tie(buffer, offset) = uint8_pass.Assemble(params.num_vertices, buffer, offset);
format = Maxwell::IndexFormat::UnsignedShort; format = Maxwell::IndexFormat::UnsignedShort;
} }
@ -1007,8 +1003,8 @@ void RasterizerVulkan::SetupGlobalBuffer(const GlobalBufferEntry& entry, GPUVAdd
const auto size = memory_manager.Read<u32>(address + 8); const auto size = memory_manager.Read<u32>(address + 8);
if (size == 0) { if (size == 0) {
// Sometimes global memory pointers don't have a proper size. Upload a dummy entry because // Sometimes global memory pointers don't have a proper size. Upload a dummy entry
// Vulkan doesn't like empty buffers. // because Vulkan doesn't like empty buffers.
constexpr std::size_t dummy_size = 4; constexpr std::size_t dummy_size = 4;
const auto buffer = buffer_cache.GetEmptyBuffer(dummy_size); const auto buffer = buffer_cache.GetEmptyBuffer(dummy_size);
update_descriptor_queue.AddBuffer(buffer, 0, dummy_size); update_descriptor_queue.AddBuffer(buffer, 0, dummy_size);

View file

@ -35,12 +35,13 @@ void VKUpdateDescriptorQueue::Send(VkDescriptorUpdateTemplateKHR update_template
payload.clear(); payload.clear();
} }
// TODO(Rodrigo): Rework to write the payload directly
const auto payload_start = payload.data() + payload.size(); const auto payload_start = payload.data() + payload.size();
for (const auto& entry : entries) { for (const auto& entry : entries) {
if (const auto image = std::get_if<VkDescriptorImageInfo>(&entry)) { if (const auto image = std::get_if<VkDescriptorImageInfo>(&entry)) {
payload.push_back(*image); payload.push_back(*image);
} else if (const auto buffer = std::get_if<Buffer>(&entry)) { } else if (const auto buffer = std::get_if<VkDescriptorBufferInfo>(&entry)) {
payload.emplace_back(*buffer->buffer, buffer->offset, buffer->size); payload.push_back(*buffer);
} else if (const auto texel = std::get_if<VkBufferView>(&entry)) { } else if (const auto texel = std::get_if<VkBufferView>(&entry)) {
payload.push_back(*texel); payload.push_back(*texel);
} else { } else {

View file

@ -18,12 +18,11 @@ class VKScheduler;
class DescriptorUpdateEntry { class DescriptorUpdateEntry {
public: public:
explicit DescriptorUpdateEntry() : image{} {} explicit DescriptorUpdateEntry() {}
DescriptorUpdateEntry(VkDescriptorImageInfo image) : image{image} {} DescriptorUpdateEntry(VkDescriptorImageInfo image) : image{image} {}
DescriptorUpdateEntry(VkBuffer buffer, VkDeviceSize offset, VkDeviceSize size) DescriptorUpdateEntry(VkDescriptorBufferInfo buffer) : buffer{buffer} {}
: buffer{buffer, offset, size} {}
DescriptorUpdateEntry(VkBufferView texel_buffer) : texel_buffer{texel_buffer} {} DescriptorUpdateEntry(VkBufferView texel_buffer) : texel_buffer{texel_buffer} {}
@ -54,8 +53,8 @@ public:
entries.emplace_back(VkDescriptorImageInfo{{}, image_view, {}}); entries.emplace_back(VkDescriptorImageInfo{{}, image_view, {}});
} }
void AddBuffer(const VkBuffer* buffer, u64 offset, std::size_t size) { void AddBuffer(VkBuffer buffer, u64 offset, std::size_t size) {
entries.push_back(Buffer{buffer, offset, size}); entries.emplace_back(VkDescriptorBufferInfo{buffer, offset, size});
} }
void AddTexelBuffer(VkBufferView texel_buffer) { void AddTexelBuffer(VkBufferView texel_buffer) {
@ -67,12 +66,7 @@ public:
} }
private: private:
struct Buffer { using Variant = std::variant<VkDescriptorImageInfo, VkDescriptorBufferInfo, VkBufferView>;
const VkBuffer* buffer = nullptr;
u64 offset = 0;
std::size_t size = 0;
};
using Variant = std::variant<VkDescriptorImageInfo, Buffer, VkBufferView>;
const VKDevice& device; const VKDevice& device;
VKScheduler& scheduler; VKScheduler& scheduler;