FastSalsa20Engine.cs 18 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560
  1. #if !BESTHTTP_DISABLE_ALTERNATE_SSL && (!UNITY_WEBGL || UNITY_EDITOR)
  2. #pragma warning disable
  3. using System;
  4. #if NETSTANDARD1_0_OR_GREATER || NETCOREAPP1_0_OR_GREATER || UNITY_2021_2_OR_NEWER
  5. using System.Runtime.CompilerServices;
  6. using System.Runtime.InteropServices;
  7. #endif
  8. #if NETCOREAPP3_0_OR_GREATER
  9. using System.Runtime.InteropServices;
  10. using System.Runtime.Intrinsics;
  11. using System.Runtime.Intrinsics.X86;
  12. #endif
  13. using Best.HTTP.SecureProtocol.Org.BouncyCastle.Crypto;
  14. using Best.HTTP.SecureProtocol.Org.BouncyCastle.Crypto.Parameters;
  15. using Best.HTTP.SecureProtocol.Org.BouncyCastle.Crypto.Utilities;
  16. using Best.HTTP.SecureProtocol.Org.BouncyCastle.Utilities;
  17. namespace Best.HTTP.Shared.TLS.Crypto.Impl
  18. {
  19. /// <summary>
  20. /// Implementation of Daniel J. Bernstein's Salsa20 stream cipher, Snuffle 2005
  21. /// </summary>
  22. [Best.HTTP.Shared.PlatformSupport.IL2CPP.Il2CppEagerStaticClassConstructionAttribute]
  23. public class FastSalsa20Engine
  24. : IStreamCipher
  25. {
  26. public static readonly int DEFAULT_ROUNDS = 20;
  27. /** Constants */
  28. private const int StateSize = 16; // 16, 32 bit ints = 64 bytes
  29. private readonly static uint[] TAU_SIGMA = Pack.LE_To_UInt32(Strings.ToAsciiByteArray("expand 16-byte k" + "expand 32-byte k"), 0, 8);
  30. internal void PackTauOrSigma(int keyLength, uint[] state, int stateOffset)
  31. {
  32. int tsOff = (keyLength - 16) / 4;
  33. state[stateOffset] = TAU_SIGMA[tsOff];
  34. state[stateOffset + 1] = TAU_SIGMA[tsOff + 1];
  35. state[stateOffset + 2] = TAU_SIGMA[tsOff + 2];
  36. state[stateOffset + 3] = TAU_SIGMA[tsOff + 3];
  37. }
  38. protected int rounds;
  39. /*
  40. * variables to hold the state of the engine
  41. * during encryption and decryption
  42. */
  43. internal int index = 0;
  44. internal uint[] engineState = new uint[StateSize]; // state
  45. internal uint[] x = new uint[StateSize]; // internal buffer
  46. internal byte[] keyStream = new byte[StateSize * 4]; // expanded state, 64 bytes
  47. internal bool initialised = false;
  48. /*
  49. * internal counter
  50. */
  51. private uint cW0, cW1, cW2;
  52. /// <summary>
  53. /// Creates a 20 round Salsa20 engine.
  54. /// </summary>
  55. public FastSalsa20Engine()
  56. : this(DEFAULT_ROUNDS)
  57. {
  58. }
  59. /// <summary>
  60. /// Creates a Salsa20 engine with a specific number of rounds.
  61. /// </summary>
  62. /// <param name="rounds">the number of rounds (must be an even number).</param>
  63. public FastSalsa20Engine(int rounds)
  64. {
  65. if (rounds <= 0 || (rounds & 1) != 0)
  66. {
  67. throw new ArgumentException("'rounds' must be a positive, even number");
  68. }
  69. this.rounds = rounds;
  70. }
  71. public virtual void Init(
  72. bool forEncryption,
  73. ICipherParameters parameters)
  74. {
  75. /*
  76. * Salsa20 encryption and decryption is completely
  77. * symmetrical, so the 'forEncryption' is
  78. * irrelevant. (Like 90% of stream ciphers)
  79. */
  80. ParametersWithIV ivParams = parameters as ParametersWithIV;
  81. if (ivParams == null)
  82. throw new ArgumentException(AlgorithmName + " Init requires an IV", "parameters");
  83. byte[] iv = ivParams.GetIV();
  84. if (iv == null || iv.Length != NonceSize)
  85. throw new ArgumentException(AlgorithmName + " requires exactly " + NonceSize + " bytes of IV");
  86. ICipherParameters keyParam = ivParams.Parameters;
  87. if (keyParam == null)
  88. {
  89. if (!initialised)
  90. throw new InvalidOperationException(AlgorithmName + " KeyParameter can not be null for first initialisation");
  91. SetKey(null, iv);
  92. }
  93. else if (keyParam is KeyParameter)
  94. {
  95. SetKey(((KeyParameter)keyParam).GetKey(), iv);
  96. }
  97. else
  98. {
  99. throw new ArgumentException(AlgorithmName + " Init parameters must contain a KeyParameter (or null for re-init)");
  100. }
  101. Reset();
  102. initialised = true;
  103. }
  104. protected virtual int NonceSize
  105. {
  106. get { return 8; }
  107. }
  108. public virtual string AlgorithmName
  109. {
  110. get
  111. {
  112. string name = "Salsa20";
  113. if (rounds != DEFAULT_ROUNDS)
  114. {
  115. name += "/" + rounds;
  116. }
  117. return name;
  118. }
  119. }
  120. public virtual byte ReturnByte(
  121. byte input)
  122. {
  123. if (LimitExceeded())
  124. {
  125. throw new MaxBytesExceededException("2^70 byte limit per IV; Change IV");
  126. }
  127. if (index == 0)
  128. {
  129. GenerateKeyStream(keyStream);
  130. AdvanceCounter();
  131. }
  132. byte output = (byte)(keyStream[index] ^ input);
  133. index = (index + 1) & 63;
  134. return output;
  135. }
  136. protected virtual void AdvanceCounter()
  137. {
  138. if (++engineState[8] == 0)
  139. {
  140. ++engineState[9];
  141. }
  142. }
  143. public unsafe virtual void ProcessBytes(
  144. byte[] inBytes,
  145. int inOff,
  146. int len,
  147. byte[] outBytes,
  148. int outOff)
  149. {
  150. if (!initialised)
  151. throw new InvalidOperationException(AlgorithmName + " not initialised");
  152. Check.DataLength(inBytes, inOff, len, "input buffer too short");
  153. Check.OutputLength(outBytes, outOff, len, "output buffer too short");
  154. if (LimitExceeded((uint)len))
  155. throw new MaxBytesExceededException("2^70 byte limit per IV would be exceeded; Change IV");
  156. for (int i = 0; i < len; i++)
  157. {
  158. if (index == 0)
  159. {
  160. GenerateKeyStream(keyStream);
  161. AdvanceCounter();
  162. if (len - i >= 64)
  163. {
  164. fixed (byte* pbout = outBytes)
  165. fixed (byte* pbin = inBytes)
  166. fixed (byte* pbkey = keyStream)
  167. {
  168. #if BESTHTTP_WITH_BURST
  169. FastSalsa20EngineHelper.ProcessBytes(pbout, outOff, pbin, inOff, pbkey);
  170. #else
  171. ulong* pulOut = (ulong*)&pbout[outOff];
  172. ulong* pulIn = (ulong*)&pbin[inOff];
  173. ulong* pulKeyStream = (ulong*)pbkey;
  174. pulOut[0] = pulKeyStream[0] ^ pulIn[0];
  175. pulOut[1] = pulKeyStream[1] ^ pulIn[1];
  176. pulOut[2] = pulKeyStream[2] ^ pulIn[2];
  177. pulOut[3] = pulKeyStream[3] ^ pulIn[3];
  178. #endif
  179. }
  180. i += 63;
  181. index = 0;
  182. continue;
  183. }
  184. }
  185. outBytes[i + outOff] = (byte)(keyStream[index] ^ inBytes[i + inOff]);
  186. index = (index + 1) & 63;
  187. }
  188. }
  189. #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER || UNITY_2021_2_OR_NEWER
  190. public virtual void ProcessBytes(ReadOnlySpan<byte> input, Span<byte> output)
  191. {
  192. if (!initialised)
  193. throw new InvalidOperationException(AlgorithmName + " not initialised");
  194. Check.OutputLength(output, input.Length, "output buffer too short");
  195. if (LimitExceeded((uint)input.Length))
  196. throw new MaxBytesExceededException("2^70 byte limit per IV would be exceeded; Change IV");
  197. for (int i = 0; i < input.Length; i++)
  198. {
  199. if (index == 0)
  200. {
  201. GenerateKeyStream(keyStream);
  202. AdvanceCounter();
  203. if (input.Length - i >= 64)
  204. {
  205. Span<ulong> lOutput = MemoryMarshal.Cast<byte, ulong>(output.Slice(i));
  206. ReadOnlySpan<ulong> lKeyStream = MemoryMarshal.Cast<byte, ulong>(keyStream);
  207. ReadOnlySpan<ulong> lInput = MemoryMarshal.Cast<byte, ulong>(input.Slice(i));
  208. lOutput[0] = lKeyStream[0] ^ lInput[0];
  209. lOutput[1] = lKeyStream[1] ^ lInput[1];
  210. lOutput[2] = lKeyStream[2] ^ lInput[2];
  211. lOutput[3] = lKeyStream[3] ^ lInput[3];
  212. i += 63;
  213. index = 0;
  214. continue;
  215. }
  216. }
  217. output[i] = (byte)(keyStream[index++] ^ input[i]);
  218. index &= 63;
  219. }
  220. }
  221. #endif
  222. public virtual void Reset()
  223. {
  224. index = 0;
  225. ResetLimitCounter();
  226. ResetCounter();
  227. }
  228. protected virtual void ResetCounter()
  229. {
  230. engineState[8] = engineState[9] = 0;
  231. }
  232. protected virtual void SetKey(byte[] keyBytes, byte[] ivBytes)
  233. {
  234. if (keyBytes != null)
  235. {
  236. if ((keyBytes.Length != 16) && (keyBytes.Length != 32))
  237. throw new ArgumentException(AlgorithmName + " requires 128 bit or 256 bit key");
  238. int tsOff = (keyBytes.Length - 16) / 4;
  239. engineState[0] = TAU_SIGMA[tsOff];
  240. engineState[5] = TAU_SIGMA[tsOff + 1];
  241. engineState[10] = TAU_SIGMA[tsOff + 2];
  242. engineState[15] = TAU_SIGMA[tsOff + 3];
  243. // Key
  244. Pack.LE_To_UInt32(keyBytes, 0, engineState, 1, 4);
  245. Pack.LE_To_UInt32(keyBytes, keyBytes.Length - 16, engineState, 11, 4);
  246. }
  247. // IV
  248. Pack.LE_To_UInt32(ivBytes, 0, engineState, 6, 2);
  249. }
  250. protected virtual void GenerateKeyStream(byte[] output)
  251. {
  252. SalsaCore(rounds, engineState, x);
  253. Pack.UInt32_To_LE(x, output, 0);
  254. }
  255. #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER || UNITY_2021_2_OR_NEWER
  256. internal static void SalsaCore(int rounds, ReadOnlySpan<uint> input, Span<uint> output)
  257. {
  258. if (input.Length < 16)
  259. throw new ArgumentException();
  260. if (output.Length < 16)
  261. throw new ArgumentException();
  262. if (rounds % 2 != 0)
  263. throw new ArgumentException("Number of rounds must be even");
  264. #if NETCOREAPP3_0_OR_GREATER
  265. if (Sse41.IsSupported && BitConverter.IsLittleEndian && Unsafe.SizeOf<Vector128<short>>() == 16)
  266. {
  267. Vector128<uint> b0, b1, b2, b3;
  268. {
  269. var I = MemoryMarshal.AsBytes(input[..16]);
  270. var t0 = MemoryMarshal.Read<Vector128<short>>(I[0x00..0x10]);
  271. var t1 = MemoryMarshal.Read<Vector128<short>>(I[0x10..0x20]);
  272. var t2 = MemoryMarshal.Read<Vector128<short>>(I[0x20..0x30]);
  273. var t3 = MemoryMarshal.Read<Vector128<short>>(I[0x30..0x40]);
  274. var u0 = Sse41.Blend(t0, t2, 0xF0);
  275. var u1 = Sse41.Blend(t1, t3, 0xC3);
  276. var u2 = Sse41.Blend(t0, t2, 0x0F);
  277. var u3 = Sse41.Blend(t1, t3, 0x3C);
  278. b0 = Sse41.Blend(u0, u1, 0xCC).AsUInt32();
  279. b1 = Sse41.Blend(u0, u1, 0x33).AsUInt32();
  280. b2 = Sse41.Blend(u2, u3, 0xCC).AsUInt32();
  281. b3 = Sse41.Blend(u2, u3, 0x33).AsUInt32();
  282. }
  283. var c0 = b0;
  284. var c1 = b1;
  285. var c2 = b2;
  286. var c3 = b3;
  287. for (int i = rounds; i > 0; i -= 2)
  288. {
  289. QuarterRound_Sse2(ref c0, ref c3, ref c2, ref c1);
  290. QuarterRound_Sse2(ref c0, ref c1, ref c2, ref c3);
  291. }
  292. b0 = Sse2.Add(b0, c0);
  293. b1 = Sse2.Add(b1, c1);
  294. b2 = Sse2.Add(b2, c2);
  295. b3 = Sse2.Add(b3, c3);
  296. {
  297. var t0 = b0.AsUInt16();
  298. var t1 = b1.AsUInt16();
  299. var t2 = b2.AsUInt16();
  300. var t3 = b3.AsUInt16();
  301. var u0 = Sse41.Blend(t0, t1, 0xCC);
  302. var u1 = Sse41.Blend(t0, t1, 0x33);
  303. var u2 = Sse41.Blend(t2, t3, 0xCC);
  304. var u3 = Sse41.Blend(t2, t3, 0x33);
  305. var v0 = Sse41.Blend(u0, u2, 0xF0);
  306. var v1 = Sse41.Blend(u1, u3, 0xC3);
  307. var v2 = Sse41.Blend(u0, u2, 0x0F);
  308. var v3 = Sse41.Blend(u1, u3, 0x3C);
  309. var X = MemoryMarshal.AsBytes(output[..16]);
  310. MemoryMarshal.Write(X[0x00..0x10], ref v0);
  311. MemoryMarshal.Write(X[0x10..0x20], ref v1);
  312. MemoryMarshal.Write(X[0x20..0x30], ref v2);
  313. MemoryMarshal.Write(X[0x30..0x40], ref v3);
  314. }
  315. return;
  316. }
  317. #endif
  318. uint x00 = input[ 0];
  319. uint x01 = input[ 1];
  320. uint x02 = input[ 2];
  321. uint x03 = input[ 3];
  322. uint x04 = input[ 4];
  323. uint x05 = input[ 5];
  324. uint x06 = input[ 6];
  325. uint x07 = input[ 7];
  326. uint x08 = input[ 8];
  327. uint x09 = input[ 9];
  328. uint x10 = input[10];
  329. uint x11 = input[11];
  330. uint x12 = input[12];
  331. uint x13 = input[13];
  332. uint x14 = input[14];
  333. uint x15 = input[15];
  334. for (int i = rounds; i > 0; i -= 2)
  335. {
  336. QuarterRound(ref x00, ref x04, ref x08, ref x12);
  337. QuarterRound(ref x05, ref x09, ref x13, ref x01);
  338. QuarterRound(ref x10, ref x14, ref x02, ref x06);
  339. QuarterRound(ref x15, ref x03, ref x07, ref x11);
  340. QuarterRound(ref x00, ref x01, ref x02, ref x03);
  341. QuarterRound(ref x05, ref x06, ref x07, ref x04);
  342. QuarterRound(ref x10, ref x11, ref x08, ref x09);
  343. QuarterRound(ref x15, ref x12, ref x13, ref x14);
  344. }
  345. output[ 0] = x00 + input[ 0];
  346. output[ 1] = x01 + input[ 1];
  347. output[ 2] = x02 + input[ 2];
  348. output[ 3] = x03 + input[ 3];
  349. output[ 4] = x04 + input[ 4];
  350. output[ 5] = x05 + input[ 5];
  351. output[ 6] = x06 + input[ 6];
  352. output[ 7] = x07 + input[ 7];
  353. output[ 8] = x08 + input[ 8];
  354. output[ 9] = x09 + input[ 9];
  355. output[10] = x10 + input[10];
  356. output[11] = x11 + input[11];
  357. output[12] = x12 + input[12];
  358. output[13] = x13 + input[13];
  359. output[14] = x14 + input[14];
  360. output[15] = x15 + input[15];
  361. }
  362. #else
  363. internal static void SalsaCore(int rounds, uint[] input, uint[] output)
  364. {
  365. if (input.Length < 16)
  366. throw new ArgumentException();
  367. if (output.Length < 16)
  368. throw new ArgumentException();
  369. if (rounds % 2 != 0)
  370. throw new ArgumentException("Number of rounds must be even");
  371. uint x00 = input[0];
  372. uint x01 = input[1];
  373. uint x02 = input[2];
  374. uint x03 = input[3];
  375. uint x04 = input[4];
  376. uint x05 = input[5];
  377. uint x06 = input[6];
  378. uint x07 = input[7];
  379. uint x08 = input[8];
  380. uint x09 = input[9];
  381. uint x10 = input[10];
  382. uint x11 = input[11];
  383. uint x12 = input[12];
  384. uint x13 = input[13];
  385. uint x14 = input[14];
  386. uint x15 = input[15];
  387. for (int i = rounds; i > 0; i -= 2)
  388. {
  389. QuarterRound(ref x00, ref x04, ref x08, ref x12);
  390. QuarterRound(ref x05, ref x09, ref x13, ref x01);
  391. QuarterRound(ref x10, ref x14, ref x02, ref x06);
  392. QuarterRound(ref x15, ref x03, ref x07, ref x11);
  393. QuarterRound(ref x00, ref x01, ref x02, ref x03);
  394. QuarterRound(ref x05, ref x06, ref x07, ref x04);
  395. QuarterRound(ref x10, ref x11, ref x08, ref x09);
  396. QuarterRound(ref x15, ref x12, ref x13, ref x14);
  397. }
  398. output[ 0] = x00 + input[ 0];
  399. output[ 1] = x01 + input[ 1];
  400. output[ 2] = x02 + input[ 2];
  401. output[ 3] = x03 + input[ 3];
  402. output[ 4] = x04 + input[ 4];
  403. output[ 5] = x05 + input[ 5];
  404. output[ 6] = x06 + input[ 6];
  405. output[ 7] = x07 + input[ 7];
  406. output[ 8] = x08 + input[ 8];
  407. output[ 9] = x09 + input[ 9];
  408. output[10] = x10 + input[10];
  409. output[11] = x11 + input[11];
  410. output[12] = x12 + input[12];
  411. output[13] = x13 + input[13];
  412. output[14] = x14 + input[14];
  413. output[15] = x15 + input[15];
  414. }
  415. #endif
  416. internal void ResetLimitCounter()
  417. {
  418. cW0 = 0;
  419. cW1 = 0;
  420. cW2 = 0;
  421. }
  422. internal bool LimitExceeded()
  423. {
  424. if (++cW0 == 0)
  425. {
  426. if (++cW1 == 0)
  427. {
  428. return (++cW2 & 0x20) != 0; // 2^(32 + 32 + 6)
  429. }
  430. }
  431. return false;
  432. }
  433. /*
  434. * this relies on the fact len will always be positive.
  435. */
  436. internal bool LimitExceeded(
  437. uint len)
  438. {
  439. uint old = cW0;
  440. cW0 += len;
  441. if (cW0 < old)
  442. {
  443. if (++cW1 == 0)
  444. {
  445. return (++cW2 & 0x20) != 0; // 2^(32 + 32 + 6)
  446. }
  447. }
  448. return false;
  449. }
  450. #if NETSTANDARD1_0_OR_GREATER || NETCOREAPP1_0_OR_GREATER || UNITY_2021_2_OR_NEWER
  451. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  452. #endif
  453. private static void QuarterRound(ref uint a, ref uint b, ref uint c, ref uint d)
  454. {
  455. b ^= Integers.RotateLeft(a + d, 7);
  456. c ^= Integers.RotateLeft(b + a, 9);
  457. d ^= Integers.RotateLeft(c + b, 13);
  458. a ^= Integers.RotateLeft(d + c, 18);
  459. }
  460. #if NETCOREAPP3_0_OR_GREATER
  461. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  462. private static void QuarterRound_Sse2(ref Vector128<uint> a, ref Vector128<uint> b, ref Vector128<uint> c,
  463. ref Vector128<uint> d)
  464. {
  465. b = Sse2.Xor(b, Rotate_Sse2(Sse2.Add(a, d), 7));
  466. c = Sse2.Xor(c, Rotate_Sse2(Sse2.Add(b, a), 9));
  467. d = Sse2.Xor(d, Rotate_Sse2(Sse2.Add(c, b), 13));
  468. a = Sse2.Xor(a, Rotate_Sse2(Sse2.Add(d, c), 18));
  469. b = Sse2.Shuffle(b, 0x93);
  470. c = Sse2.Shuffle(c, 0x4E);
  471. d = Sse2.Shuffle(d, 0x39);
  472. }
  473. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  474. private static Vector128<uint> Rotate_Sse2(Vector128<uint> x, byte sl)
  475. {
  476. byte sr = (byte)(32 - sl);
  477. return Sse2.Xor(Sse2.ShiftLeftLogical(x, sl), Sse2.ShiftRightLogical(x, sr));
  478. }
  479. #endif
  480. }
  481. }
  482. #pragma warning restore
  483. #endif