Add timestamp to 16-byte/4-word semaphore releases. (#3049)

* Add timestamp to 16-byte semaphore releases.

BOTW was reading a ulong 8 bytes after a semaphore return. Turns out this is the timestamp it was trying to do performance calculation with, so I've made it write when necessary.

This mode was also added to the DMA semaphore I added recently, as it is required by a few games. (i think quake?)

The timestamp code has been moved to GPU context. Check other games with an unusually low framerate cap or dynamic resolution to see if they have improved.

* Cast dma semaphore payload to ulong to fill the space

* Write timestamp first

Might be just worrying too much, but we don't want the applcation reading timestamp if it sees the payload before timestamp is written.
This commit is contained in:
riperiperi 2022-01-27 21:50:32 +00:00 committed by GitHub
parent fd6d3ec88f
commit c52158b733
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
4 changed files with 54 additions and 37 deletions

View file

@ -115,7 +115,8 @@ namespace Ryujinx.Graphics.Gpu.Engine.Dma
}
else /* if (type == LaunchDmaSemaphoreType.ReleaseFourWordSemaphore) */
{
Logger.Warning?.Print(LogClass.Gpu, "DMA semaphore type ReleaseFourWordSemaphore was used, but is not currently implemented.");
_channel.MemoryManager.Write(address + 8, _context.GetTimestamp());
_channel.MemoryManager.Write(address, (ulong)_state.State.SetSemaphorePayload);
}
}
}

View file

@ -75,6 +75,12 @@ namespace Ryujinx.Graphics.Gpu.Engine.GPFifo
SemaphoredOperation operation = _state.State.SemaphoredOperation;
if (_state.State.SemaphoredReleaseSize == SemaphoredReleaseSize.SixteenBytes)
{
_parent.MemoryManager.Write(address + 4, 0);
_parent.MemoryManager.Write(address + 8, _context.GetTimestamp());
}
// TODO: Acquire operations (Wait), interrupts for invalid combinations.
if (operation == SemaphoredOperation.Release)
{

View file

@ -1,6 +1,4 @@
using Ryujinx.Common;
using Ryujinx.Graphics.GAL;
using System.Runtime.InteropServices;
using Ryujinx.Graphics.GAL;
namespace Ryujinx.Graphics.Gpu.Engine.Threed
{
@ -9,9 +7,6 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed
/// </summary>
class SemaphoreUpdater
{
private const int NsToTicksFractionNumerator = 384;
private const int NsToTicksFractionDenominator = 625;
/// <summary>
/// GPU semaphore operation.
/// </summary>
@ -154,14 +149,7 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed
{
ulong gpuVa = _state.State.SemaphoreAddress.Pack();
ulong ticks = ConvertNanosecondsToTicks((ulong)PerformanceCounter.ElapsedNanoseconds);
if (GraphicsConfig.FastGpuTime)
{
// Divide by some amount to report time as if operations were performed faster than they really are.
// This can prevent some games from switching to a lower resolution because rendering is too slow.
ticks /= 256;
}
ulong ticks = _context.GetTimestamp();
ICounterEvent counter = null;
@ -197,27 +185,5 @@ namespace Ryujinx.Graphics.Gpu.Engine.Threed
_channel.MemoryManager.CounterCache.AddOrUpdate(gpuVa, counter);
}
/// <summary>
/// Converts a nanoseconds timestamp value to Maxwell time ticks.
/// </summary>
/// <remarks>
/// The frequency is 614400000 Hz.
/// </remarks>
/// <param name="nanoseconds">Timestamp in nanoseconds</param>
/// <returns>Maxwell ticks</returns>
private static ulong ConvertNanosecondsToTicks(ulong nanoseconds)
{
// We need to divide first to avoid overflows.
// We fix up the result later by calculating the difference and adding
// that to the result.
ulong divided = nanoseconds / NsToTicksFractionDenominator;
ulong rounded = divided * NsToTicksFractionDenominator;
ulong errorBias = (nanoseconds - rounded) * NsToTicksFractionNumerator / NsToTicksFractionDenominator;
return divided * NsToTicksFractionNumerator + errorBias;
}
}
}

View file

@ -1,3 +1,4 @@
using Ryujinx.Common;
using Ryujinx.Graphics.GAL;
using Ryujinx.Graphics.Gpu.Engine.GPFifo;
using Ryujinx.Graphics.Gpu.Memory;
@ -15,6 +16,9 @@ namespace Ryujinx.Graphics.Gpu
/// </summary>
public sealed class GpuContext : IDisposable
{
private const int NsToTicksFractionNumerator = 384;
private const int NsToTicksFractionDenominator = 625;
/// <summary>
/// Event signaled when the host emulation context is ready to be used by the gpu context.
/// </summary>
@ -180,6 +184,46 @@ namespace Ryujinx.Graphics.Gpu
}
}
/// <summary>
/// Converts a nanoseconds timestamp value to Maxwell time ticks.
/// </summary>
/// <remarks>
/// The frequency is 614400000 Hz.
/// </remarks>
/// <param name="nanoseconds">Timestamp in nanoseconds</param>
/// <returns>Maxwell ticks</returns>
private static ulong ConvertNanosecondsToTicks(ulong nanoseconds)
{
// We need to divide first to avoid overflows.
// We fix up the result later by calculating the difference and adding
// that to the result.
ulong divided = nanoseconds / NsToTicksFractionDenominator;
ulong rounded = divided * NsToTicksFractionDenominator;
ulong errorBias = (nanoseconds - rounded) * NsToTicksFractionNumerator / NsToTicksFractionDenominator;
return divided * NsToTicksFractionNumerator + errorBias;
}
/// <summary>
/// Gets the value of the GPU timer.
/// </summary>
/// <returns>The current GPU timestamp</returns>
public ulong GetTimestamp()
{
ulong ticks = ConvertNanosecondsToTicks((ulong)PerformanceCounter.ElapsedNanoseconds);
if (GraphicsConfig.FastGpuTime)
{
// Divide by some amount to report time as if operations were performed faster than they really are.
// This can prevent some games from switching to a lower resolution because rendering is too slow.
ticks /= 256;
}
return ticks;
}
/// <summary>
/// Shader cache state update handler.
/// </summary>