Implement fast path for AES crypto instructions on Arm64 (#5281)
* Implement fast path for AES crypto instructions on Arm64 * PPTC version bump * Use AES HW feature check
This commit is contained in:
parent
eb0bb36bbf
commit
193ca3c9a2
9 changed files with 79 additions and 16 deletions
|
@ -168,8 +168,6 @@ namespace ARMeilleure.CodeGen.Arm64
|
||||||
|
|
||||||
Logger.StartPass(PassName.CodeGeneration);
|
Logger.StartPass(PassName.CodeGeneration);
|
||||||
|
|
||||||
//Console.Error.WriteLine(IRDumper.GetDump(cfg));
|
|
||||||
|
|
||||||
bool relocatable = (cctx.Options & CompilerOptions.Relocatable) != 0;
|
bool relocatable = (cctx.Options & CompilerOptions.Relocatable) != 0;
|
||||||
|
|
||||||
CodeGenContext context = new(allocResult, maxCallArgs, cfg.Blocks.Count, relocatable);
|
CodeGenContext context = new(allocResult, maxCallArgs, cfg.Blocks.Count, relocatable);
|
||||||
|
|
|
@ -179,6 +179,35 @@ namespace ARMeilleure.CodeGen.Arm64
|
||||||
(uint)operation.GetSource(2).AsInt32());
|
(uint)operation.GetSource(2).AsInt32());
|
||||||
break;
|
break;
|
||||||
|
|
||||||
|
case IntrinsicType.Vector128Unary:
|
||||||
|
GenerateVectorUnary(
|
||||||
|
context,
|
||||||
|
1,
|
||||||
|
0,
|
||||||
|
info.Inst,
|
||||||
|
operation.Destination,
|
||||||
|
operation.GetSource(0));
|
||||||
|
break;
|
||||||
|
case IntrinsicType.Vector128Binary:
|
||||||
|
GenerateVectorBinary(
|
||||||
|
context,
|
||||||
|
1,
|
||||||
|
0,
|
||||||
|
info.Inst,
|
||||||
|
operation.Destination,
|
||||||
|
operation.GetSource(0),
|
||||||
|
operation.GetSource(1));
|
||||||
|
break;
|
||||||
|
case IntrinsicType.Vector128BinaryRd:
|
||||||
|
GenerateVectorUnary(
|
||||||
|
context,
|
||||||
|
1,
|
||||||
|
0,
|
||||||
|
info.Inst,
|
||||||
|
operation.Destination,
|
||||||
|
operation.GetSource(1));
|
||||||
|
break;
|
||||||
|
|
||||||
case IntrinsicType.VectorUnary:
|
case IntrinsicType.VectorUnary:
|
||||||
GenerateVectorUnary(
|
GenerateVectorUnary(
|
||||||
context,
|
context,
|
||||||
|
|
|
@ -19,8 +19,8 @@ namespace ARMeilleure.CodeGen.Arm64
|
||||||
Add(Intrinsic.Arm64AddvV, new IntrinsicInfo(0x0e31b800u, IntrinsicType.VectorUnary));
|
Add(Intrinsic.Arm64AddvV, new IntrinsicInfo(0x0e31b800u, IntrinsicType.VectorUnary));
|
||||||
Add(Intrinsic.Arm64AddS, new IntrinsicInfo(0x5e208400u, IntrinsicType.ScalarBinary));
|
Add(Intrinsic.Arm64AddS, new IntrinsicInfo(0x5e208400u, IntrinsicType.ScalarBinary));
|
||||||
Add(Intrinsic.Arm64AddV, new IntrinsicInfo(0x0e208400u, IntrinsicType.VectorBinary));
|
Add(Intrinsic.Arm64AddV, new IntrinsicInfo(0x0e208400u, IntrinsicType.VectorBinary));
|
||||||
Add(Intrinsic.Arm64AesdV, new IntrinsicInfo(0x4e285800u, IntrinsicType.Vector128Unary));
|
Add(Intrinsic.Arm64AesdV, new IntrinsicInfo(0x4e285800u, IntrinsicType.Vector128BinaryRd));
|
||||||
Add(Intrinsic.Arm64AeseV, new IntrinsicInfo(0x4e284800u, IntrinsicType.Vector128Unary));
|
Add(Intrinsic.Arm64AeseV, new IntrinsicInfo(0x4e284800u, IntrinsicType.Vector128BinaryRd));
|
||||||
Add(Intrinsic.Arm64AesimcV, new IntrinsicInfo(0x4e287800u, IntrinsicType.Vector128Unary));
|
Add(Intrinsic.Arm64AesimcV, new IntrinsicInfo(0x4e287800u, IntrinsicType.Vector128Unary));
|
||||||
Add(Intrinsic.Arm64AesmcV, new IntrinsicInfo(0x4e286800u, IntrinsicType.Vector128Unary));
|
Add(Intrinsic.Arm64AesmcV, new IntrinsicInfo(0x4e286800u, IntrinsicType.Vector128Unary));
|
||||||
Add(Intrinsic.Arm64AndV, new IntrinsicInfo(0x0e201c00u, IntrinsicType.VectorBinaryBitwise));
|
Add(Intrinsic.Arm64AndV, new IntrinsicInfo(0x0e201c00u, IntrinsicType.VectorBinaryBitwise));
|
||||||
|
|
|
@ -23,6 +23,10 @@ namespace ARMeilleure.CodeGen.Arm64
|
||||||
ScalarTernaryShlRd,
|
ScalarTernaryShlRd,
|
||||||
ScalarTernaryShrRd,
|
ScalarTernaryShrRd,
|
||||||
|
|
||||||
|
Vector128Unary,
|
||||||
|
Vector128Binary,
|
||||||
|
Vector128BinaryRd,
|
||||||
|
|
||||||
VectorUnary,
|
VectorUnary,
|
||||||
VectorUnaryBitwise,
|
VectorUnaryBitwise,
|
||||||
VectorUnaryByElem,
|
VectorUnaryByElem,
|
||||||
|
@ -50,9 +54,6 @@ namespace ARMeilleure.CodeGen.Arm64
|
||||||
VectorTernaryShlRd,
|
VectorTernaryShlRd,
|
||||||
VectorTernaryShrRd,
|
VectorTernaryShrRd,
|
||||||
|
|
||||||
Vector128Unary,
|
|
||||||
Vector128Binary,
|
|
||||||
|
|
||||||
GetRegister,
|
GetRegister,
|
||||||
SetRegister
|
SetRegister
|
||||||
}
|
}
|
||||||
|
|
|
@ -746,6 +746,7 @@ namespace ARMeilleure.CodeGen.Arm64
|
||||||
info.Type == IntrinsicType.ScalarTernaryFPRdByElem ||
|
info.Type == IntrinsicType.ScalarTernaryFPRdByElem ||
|
||||||
info.Type == IntrinsicType.ScalarTernaryShlRd ||
|
info.Type == IntrinsicType.ScalarTernaryShlRd ||
|
||||||
info.Type == IntrinsicType.ScalarTernaryShrRd ||
|
info.Type == IntrinsicType.ScalarTernaryShrRd ||
|
||||||
|
info.Type == IntrinsicType.Vector128BinaryRd ||
|
||||||
info.Type == IntrinsicType.VectorBinaryRd ||
|
info.Type == IntrinsicType.VectorBinaryRd ||
|
||||||
info.Type == IntrinsicType.VectorInsertByElem ||
|
info.Type == IntrinsicType.VectorInsertByElem ||
|
||||||
info.Type == IntrinsicType.VectorTernaryRd ||
|
info.Type == IntrinsicType.VectorTernaryRd ||
|
||||||
|
|
|
@ -17,7 +17,11 @@ namespace ARMeilleure.Instructions
|
||||||
|
|
||||||
Operand res;
|
Operand res;
|
||||||
|
|
||||||
if (Optimizations.UseAesni)
|
if (Optimizations.UseArm64Aes)
|
||||||
|
{
|
||||||
|
res = context.AddIntrinsic(Intrinsic.Arm64AesdV, d, n);
|
||||||
|
}
|
||||||
|
else if (Optimizations.UseAesni)
|
||||||
{
|
{
|
||||||
res = context.AddIntrinsic(Intrinsic.X86Aesdeclast, context.AddIntrinsic(Intrinsic.X86Xorpd, d, n), context.VectorZero());
|
res = context.AddIntrinsic(Intrinsic.X86Aesdeclast, context.AddIntrinsic(Intrinsic.X86Xorpd, d, n), context.VectorZero());
|
||||||
}
|
}
|
||||||
|
@ -38,7 +42,11 @@ namespace ARMeilleure.Instructions
|
||||||
|
|
||||||
Operand res;
|
Operand res;
|
||||||
|
|
||||||
if (Optimizations.UseAesni)
|
if (Optimizations.UseArm64Aes)
|
||||||
|
{
|
||||||
|
res = context.AddIntrinsic(Intrinsic.Arm64AeseV, d, n);
|
||||||
|
}
|
||||||
|
else if (Optimizations.UseAesni)
|
||||||
{
|
{
|
||||||
res = context.AddIntrinsic(Intrinsic.X86Aesenclast, context.AddIntrinsic(Intrinsic.X86Xorpd, d, n), context.VectorZero());
|
res = context.AddIntrinsic(Intrinsic.X86Aesenclast, context.AddIntrinsic(Intrinsic.X86Xorpd, d, n), context.VectorZero());
|
||||||
}
|
}
|
||||||
|
@ -58,7 +66,11 @@ namespace ARMeilleure.Instructions
|
||||||
|
|
||||||
Operand res;
|
Operand res;
|
||||||
|
|
||||||
if (Optimizations.UseAesni)
|
if (Optimizations.UseArm64Aes)
|
||||||
|
{
|
||||||
|
res = context.AddIntrinsic(Intrinsic.Arm64AesimcV, n);
|
||||||
|
}
|
||||||
|
else if (Optimizations.UseAesni)
|
||||||
{
|
{
|
||||||
res = context.AddIntrinsic(Intrinsic.X86Aesimc, n);
|
res = context.AddIntrinsic(Intrinsic.X86Aesimc, n);
|
||||||
}
|
}
|
||||||
|
@ -78,7 +90,11 @@ namespace ARMeilleure.Instructions
|
||||||
|
|
||||||
Operand res;
|
Operand res;
|
||||||
|
|
||||||
if (Optimizations.UseAesni)
|
if (Optimizations.UseArm64Aes)
|
||||||
|
{
|
||||||
|
res = context.AddIntrinsic(Intrinsic.Arm64AesmcV, n);
|
||||||
|
}
|
||||||
|
else if (Optimizations.UseAesni)
|
||||||
{
|
{
|
||||||
Operand roundKey = context.VectorZero();
|
Operand roundKey = context.VectorZero();
|
||||||
|
|
||||||
|
|
|
@ -17,7 +17,11 @@ namespace ARMeilleure.Instructions
|
||||||
|
|
||||||
Operand res;
|
Operand res;
|
||||||
|
|
||||||
if (Optimizations.UseAesni)
|
if (Optimizations.UseArm64Aes)
|
||||||
|
{
|
||||||
|
res = context.AddIntrinsic(Intrinsic.Arm64AesdV, d, n);
|
||||||
|
}
|
||||||
|
else if (Optimizations.UseAesni)
|
||||||
{
|
{
|
||||||
res = context.AddIntrinsic(Intrinsic.X86Aesdeclast, context.AddIntrinsic(Intrinsic.X86Xorpd, d, n), context.VectorZero());
|
res = context.AddIntrinsic(Intrinsic.X86Aesdeclast, context.AddIntrinsic(Intrinsic.X86Xorpd, d, n), context.VectorZero());
|
||||||
}
|
}
|
||||||
|
@ -38,7 +42,11 @@ namespace ARMeilleure.Instructions
|
||||||
|
|
||||||
Operand res;
|
Operand res;
|
||||||
|
|
||||||
if (Optimizations.UseAesni)
|
if (Optimizations.UseArm64Aes)
|
||||||
|
{
|
||||||
|
res = context.AddIntrinsic(Intrinsic.Arm64AeseV, d, n);
|
||||||
|
}
|
||||||
|
else if (Optimizations.UseAesni)
|
||||||
{
|
{
|
||||||
res = context.AddIntrinsic(Intrinsic.X86Aesenclast, context.AddIntrinsic(Intrinsic.X86Xorpd, d, n), context.VectorZero());
|
res = context.AddIntrinsic(Intrinsic.X86Aesenclast, context.AddIntrinsic(Intrinsic.X86Xorpd, d, n), context.VectorZero());
|
||||||
}
|
}
|
||||||
|
@ -58,7 +66,11 @@ namespace ARMeilleure.Instructions
|
||||||
|
|
||||||
Operand res;
|
Operand res;
|
||||||
|
|
||||||
if (Optimizations.UseAesni)
|
if (Optimizations.UseArm64Aes)
|
||||||
|
{
|
||||||
|
res = context.AddIntrinsic(Intrinsic.Arm64AesimcV, n);
|
||||||
|
}
|
||||||
|
else if (Optimizations.UseAesni)
|
||||||
{
|
{
|
||||||
res = context.AddIntrinsic(Intrinsic.X86Aesimc, n);
|
res = context.AddIntrinsic(Intrinsic.X86Aesimc, n);
|
||||||
}
|
}
|
||||||
|
@ -78,7 +90,11 @@ namespace ARMeilleure.Instructions
|
||||||
|
|
||||||
Operand res;
|
Operand res;
|
||||||
|
|
||||||
if (Optimizations.UseAesni)
|
if (Optimizations.UseArm64Aes)
|
||||||
|
{
|
||||||
|
res = context.AddIntrinsic(Intrinsic.Arm64AesmcV, n);
|
||||||
|
}
|
||||||
|
else if (Optimizations.UseAesni)
|
||||||
{
|
{
|
||||||
Operand roundKey = context.VectorZero();
|
Operand roundKey = context.VectorZero();
|
||||||
|
|
||||||
|
|
|
@ -13,6 +13,7 @@ namespace ARMeilleure
|
||||||
public static bool UseUnmanagedDispatchLoop { get; set; } = true;
|
public static bool UseUnmanagedDispatchLoop { get; set; } = true;
|
||||||
|
|
||||||
public static bool UseAdvSimdIfAvailable { get; set; } = true;
|
public static bool UseAdvSimdIfAvailable { get; set; } = true;
|
||||||
|
public static bool UseArm64AesIfAvailable { get; set; } = true;
|
||||||
public static bool UseArm64PmullIfAvailable { get; set; } = true;
|
public static bool UseArm64PmullIfAvailable { get; set; } = true;
|
||||||
|
|
||||||
public static bool UseSseIfAvailable { get; set; } = true;
|
public static bool UseSseIfAvailable { get; set; } = true;
|
||||||
|
@ -41,6 +42,7 @@ namespace ARMeilleure
|
||||||
}
|
}
|
||||||
|
|
||||||
internal static bool UseAdvSimd => UseAdvSimdIfAvailable && Arm64HardwareCapabilities.SupportsAdvSimd;
|
internal static bool UseAdvSimd => UseAdvSimdIfAvailable && Arm64HardwareCapabilities.SupportsAdvSimd;
|
||||||
|
internal static bool UseArm64Aes => UseArm64AesIfAvailable && Arm64HardwareCapabilities.SupportsAes;
|
||||||
internal static bool UseArm64Pmull => UseArm64PmullIfAvailable && Arm64HardwareCapabilities.SupportsPmull;
|
internal static bool UseArm64Pmull => UseArm64PmullIfAvailable && Arm64HardwareCapabilities.SupportsPmull;
|
||||||
|
|
||||||
internal static bool UseSse => UseSseIfAvailable && X86HardwareCapabilities.SupportsSse;
|
internal static bool UseSse => UseSseIfAvailable && X86HardwareCapabilities.SupportsSse;
|
||||||
|
|
|
@ -30,7 +30,7 @@ namespace ARMeilleure.Translation.PTC
|
||||||
private const string OuterHeaderMagicString = "PTCohd\0\0";
|
private const string OuterHeaderMagicString = "PTCohd\0\0";
|
||||||
private const string InnerHeaderMagicString = "PTCihd\0\0";
|
private const string InnerHeaderMagicString = "PTCihd\0\0";
|
||||||
|
|
||||||
private const uint InternalVersion = 4661; //! To be incremented manually for each change to the ARMeilleure project.
|
private const uint InternalVersion = 5281; //! To be incremented manually for each change to the ARMeilleure project.
|
||||||
|
|
||||||
private const string ActualDir = "0";
|
private const string ActualDir = "0";
|
||||||
private const string BackupDir = "1";
|
private const string BackupDir = "1";
|
||||||
|
|
Loading…
Reference in a new issue