FastAesEngineHelper.cs 7.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195
  1. #if !BESTHTTP_DISABLE_ALTERNATE_SSL && (!UNITY_WEBGL || UNITY_EDITOR)
  2. using System;
  3. using Best.HTTP.SecureProtocol.Org.BouncyCastle.Crypto.Utilities;
  4. namespace Best.HTTP.Shared.TLS.Crypto.Impl
  5. {
  6. internal static class FastAesEngineHelper
  7. {
  8. public unsafe static void EncryptBlock(byte[] input, int inOff, byte[] output, int outOff, uint[][] KW, int ROUNDS, uint[] T0, byte[] S, byte[] s)
  9. {
  10. uint C0 = Pack.LE_To_UInt32(input, inOff + 0);
  11. uint C1 = Pack.LE_To_UInt32(input, inOff + 4);
  12. uint C2 = Pack.LE_To_UInt32(input, inOff + 8);
  13. uint C3 = Pack.LE_To_UInt32(input, inOff + 12);
  14. uint[] kw = KW[0];
  15. uint t0 = C0 ^ kw[0];
  16. uint t1 = C1 ^ kw[1];
  17. uint t2 = C2 ^ kw[2];
  18. uint r0, r1, r2, r3 = C3 ^ kw[3];
  19. int r = 1;
  20. byte idx;
  21. uint tmp1, tmp2, tmp3;
  22. fixed (uint* pT0 = T0)
  23. {
  24. while (r < ROUNDS - 1)
  25. {
  26. kw = KW[r++];
  27. fixed (uint* pkw = kw)
  28. {
  29. idx = (byte)(t1 >> 8);
  30. tmp1 = (pT0[idx] >> 24) | (pT0[idx] << 8);
  31. idx = (byte)(t2 >> 16);
  32. tmp2 = (pT0[idx] >> 16) | (pT0[idx] << 16);
  33. idx = (byte)(r3 >> 24);
  34. tmp3 = (pT0[idx] >> 8) | (pT0[idx] << 24);
  35. r0 = pT0[t0 & 255] ^ tmp1 ^ tmp2 ^ tmp3 ^ pkw[0];
  36. idx = (byte)(t2 >> 8);
  37. tmp1 = (pT0[idx] >> 24) | (pT0[idx] << 8);
  38. idx = (byte)(r3 >> 16);
  39. tmp2 = (pT0[idx] >> 16) | (pT0[idx] << 16);
  40. idx = (byte)(t0 >> 24);
  41. tmp3 = (pT0[idx] >> 8) | (pT0[idx] << 24);
  42. r1 = pT0[t1 & 255] ^ tmp1 ^ tmp2 ^ tmp3 ^ pkw[1];
  43. idx = (byte)(r3 >> 8);
  44. tmp1 = (pT0[idx] >> 24) | (pT0[idx] << 8);
  45. idx = (byte)(t0 >> 16);
  46. tmp2 = (pT0[idx] >> 16) | (pT0[idx] << 16);
  47. idx = (byte)(t1 >> 24);
  48. tmp3 = (pT0[idx] >> 8) | (pT0[idx] << 24);
  49. r2 = pT0[t2 & 255] ^ tmp1 ^ tmp2 ^ tmp3 ^ pkw[2];
  50. idx = (byte)(t0 >> 8);
  51. tmp1 = (pT0[idx] >> 24) | (pT0[idx] << 8);
  52. idx = (byte)(t1 >> 16);
  53. tmp2 = (pT0[idx] >> 16) | (pT0[idx] << 16);
  54. idx = (byte)(t2 >> 24);
  55. tmp3 = (pT0[idx] >> 8) | (pT0[idx] << 24);
  56. r3 = pT0[r3 & 255] ^ tmp1 ^ tmp2 ^ tmp3 ^ pkw[3];
  57. }
  58. kw = KW[r++];
  59. fixed (uint* pkw = kw)
  60. {
  61. idx = (byte)(r1 >> 8);
  62. tmp1 = (pT0[idx] >> 24) | (pT0[idx] << 8);
  63. idx = (byte)(r2 >> 16);
  64. tmp2 = (pT0[idx] >> 16) | (pT0[idx] << 16);
  65. idx = (byte)(r3 >> 24);
  66. tmp3 = (pT0[idx] >> 8) | (pT0[idx] << 24);
  67. t0 = pT0[r0 & 255] ^ tmp1 ^ tmp2 ^ tmp3 ^ pkw[0];
  68. idx = (byte)(r2 >> 8);
  69. tmp1 = (pT0[idx] >> 24) | (pT0[idx] << 8);
  70. idx = (byte)(r3 >> 16);
  71. tmp2 = (pT0[idx] >> 16) | (pT0[idx] << 16);
  72. idx = (byte)(r0 >> 24);
  73. tmp3 = (pT0[idx] >> 8) | (pT0[idx] << 24);
  74. t1 = pT0[r1 & 255] ^ tmp1 ^ tmp2 ^ tmp3 ^ pkw[1];
  75. idx = (byte)(r3 >> 8);
  76. tmp1 = (pT0[idx] >> 24) | (pT0[idx] << 8);
  77. idx = (byte)(r0 >> 16);
  78. tmp2 = (pT0[idx] >> 16) | (pT0[idx] << 16);
  79. idx = (byte)(r1 >> 24);
  80. tmp3 = (pT0[idx] >> 8) | (pT0[idx] << 24);
  81. t2 = pT0[r2 & 255] ^ tmp1 ^ tmp2 ^ tmp3 ^ pkw[2];
  82. idx = (byte)(r0 >> 8);
  83. tmp1 = (pT0[idx] >> 24) | (pT0[idx] << 8);
  84. idx = (byte)(r1 >> 16);
  85. tmp2 = (pT0[idx] >> 16) | (pT0[idx] << 16);
  86. idx = (byte)(r2 >> 24);
  87. tmp3 = (pT0[idx] >> 8) | (pT0[idx] << 24);
  88. r3 = pT0[r3 & 255] ^ tmp1 ^ tmp2 ^ tmp3 ^ pkw[3];
  89. }
  90. }
  91. kw = KW[r++];
  92. fixed (uint* pkw = kw)
  93. {
  94. idx = (byte)(t1 >> 8);
  95. tmp1 = (pT0[idx] >> 24) | (pT0[idx] << 8);
  96. idx = (byte)(t2 >> 16);
  97. tmp2 = (pT0[idx] >> 16) | (pT0[idx] << 16);
  98. idx = (byte)(r3 >> 24);
  99. tmp3 = (pT0[idx] >> 8) | (pT0[idx] << 24);
  100. r0 = pT0[t0 & 255] ^ tmp1 ^ tmp2 ^ tmp3 ^ pkw[0];
  101. idx = (byte)(t2 >> 8);
  102. tmp1 = (pT0[idx] >> 24) | (pT0[idx] << 8);
  103. idx = (byte)(r3 >> 16);
  104. tmp2 = (pT0[idx] >> 16) | (pT0[idx] << 16);
  105. idx = (byte)(t0 >> 24);
  106. tmp3 = (pT0[idx] >> 8) | (pT0[idx] << 24);
  107. r1 = pT0[t1 & 255] ^ tmp1 ^ tmp2 ^ tmp3 ^ pkw[1];
  108. idx = (byte)(r3 >> 8);
  109. tmp1 = (pT0[idx] >> 24) | (pT0[idx] << 8);
  110. idx = (byte)(t0 >> 16);
  111. tmp2 = (pT0[idx] >> 16) | (pT0[idx] << 16);
  112. idx = (byte)(t1 >> 24);
  113. tmp3 = (pT0[idx] >> 8) | (pT0[idx] << 24);
  114. r2 = pT0[t2 & 255] ^ tmp1 ^ tmp2 ^ tmp3 ^ pkw[2];
  115. idx = (byte)(t0 >> 8);
  116. tmp1 = (pT0[idx] >> 24) | (pT0[idx] << 8);
  117. idx = (byte)(t1 >> 16);
  118. tmp2 = (pT0[idx] >> 16) | (pT0[idx] << 16);
  119. idx = (byte)(t2 >> 24);
  120. tmp3 = (pT0[idx] >> 8) | (pT0[idx] << 24);
  121. r3 = pT0[r3 & 255] ^ tmp1 ^ tmp2 ^ tmp3 ^ pkw[3];
  122. }
  123. // the final round's table is a simple function of S so we don't use a whole other four tables for it
  124. kw = KW[r];
  125. fixed (byte* pS = S, ps = s)
  126. fixed (uint* pkw = kw)
  127. {
  128. C0 = (uint)pS[(byte)r0] ^ (((uint)pS[(byte)(r1 >> 8)]) << 8) ^ (((uint)ps[(byte)(r2 >> 16)]) << 16) ^ (((uint)ps[(byte)(r3 >> 24)]) << 24) ^ pkw[0];
  129. C1 = (uint)ps[(byte)r1] ^ (((uint)pS[(byte)(r2 >> 8)]) << 8) ^ (((uint)pS[(byte)(r3 >> 16)]) << 16) ^ (((uint)ps[(byte)(r0 >> 24)]) << 24) ^ pkw[1];
  130. C2 = (uint)ps[(byte)r2] ^ (((uint)pS[(byte)(r3 >> 8)]) << 8) ^ (((uint)pS[(byte)(r0 >> 16)]) << 16) ^ (((uint)pS[(byte)(r1 >> 24)]) << 24) ^ pkw[2];
  131. C3 = (uint)ps[(byte)r3] ^ (((uint)ps[(byte)(r0 >> 8)]) << 8) ^ (((uint)ps[(byte)(r1 >> 16)]) << 16) ^ (((uint)pS[(byte)(r2 >> 24)]) << 24) ^ pkw[3];
  132. }
  133. }
  134. Pack.UInt32_To_LE(C0, output, outOff + 0);
  135. Pack.UInt32_To_LE(C1, output, outOff + 4);
  136. Pack.UInt32_To_LE(C2, output, outOff + 8);
  137. Pack.UInt32_To_LE(C3, output, outOff + 12);
  138. }
  139. }
  140. }
  141. #endif