Salsa20Engine.cs 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512
  1. #if !BESTHTTP_DISABLE_ALTERNATE_SSL && (!UNITY_WEBGL || UNITY_EDITOR)
  2. #pragma warning disable
  3. using System;
  4. #if NETSTANDARD1_0_OR_GREATER || NETCOREAPP1_0_OR_GREATER || UNITY_2021_2_OR_NEWER
  5. using System.Runtime.CompilerServices;
  6. #endif
  7. #if NETCOREAPP3_0_OR_GREATER
  8. using System.Runtime.InteropServices;
  9. using System.Runtime.Intrinsics;
  10. using System.Runtime.Intrinsics.X86;
  11. #endif
  12. using Best.HTTP.SecureProtocol.Org.BouncyCastle.Crypto.Parameters;
  13. using Best.HTTP.SecureProtocol.Org.BouncyCastle.Crypto.Utilities;
  14. using Best.HTTP.SecureProtocol.Org.BouncyCastle.Utilities;
  15. namespace Best.HTTP.SecureProtocol.Org.BouncyCastle.Crypto.Engines
  16. {
  17. /// <summary>
  18. /// Implementation of Daniel J. Bernstein's Salsa20 stream cipher, Snuffle 2005
  19. /// </summary>
  20. public class Salsa20Engine
  21. : IStreamCipher
  22. {
  23. public static readonly int DEFAULT_ROUNDS = 20;
  24. /** Constants */
  25. private const int StateSize = 16; // 16, 32 bit ints = 64 bytes
  26. private readonly static uint[] TAU_SIGMA = Pack.LE_To_UInt32(Strings.ToAsciiByteArray("expand 16-byte k" + "expand 32-byte k"), 0, 8);
  27. internal void PackTauOrSigma(int keyLength, uint[] state, int stateOffset)
  28. {
  29. int tsOff = (keyLength - 16) / 4;
  30. state[stateOffset] = TAU_SIGMA[tsOff];
  31. state[stateOffset + 1] = TAU_SIGMA[tsOff + 1];
  32. state[stateOffset + 2] = TAU_SIGMA[tsOff + 2];
  33. state[stateOffset + 3] = TAU_SIGMA[tsOff + 3];
  34. }
  35. protected int rounds;
  36. /*
  37. * variables to hold the state of the engine
  38. * during encryption and decryption
  39. */
  40. internal int index = 0;
  41. internal uint[] engineState = new uint[StateSize]; // state
  42. internal uint[] x = new uint[StateSize]; // internal buffer
  43. internal byte[] keyStream = new byte[StateSize * 4]; // expanded state, 64 bytes
  44. internal bool initialised = false;
  45. /*
  46. * internal counter
  47. */
  48. private uint cW0, cW1, cW2;
  49. /// <summary>
  50. /// Creates a 20 round Salsa20 engine.
  51. /// </summary>
  52. public Salsa20Engine()
  53. : this(DEFAULT_ROUNDS)
  54. {
  55. }
  56. /// <summary>
  57. /// Creates a Salsa20 engine with a specific number of rounds.
  58. /// </summary>
  59. /// <param name="rounds">the number of rounds (must be an even number).</param>
  60. public Salsa20Engine(int rounds)
  61. {
  62. if (rounds <= 0 || (rounds & 1) != 0)
  63. {
  64. throw new ArgumentException("'rounds' must be a positive, even number");
  65. }
  66. this.rounds = rounds;
  67. }
  68. public virtual void Init(
  69. bool forEncryption,
  70. ICipherParameters parameters)
  71. {
  72. /*
  73. * Salsa20 encryption and decryption is completely
  74. * symmetrical, so the 'forEncryption' is
  75. * irrelevant. (Like 90% of stream ciphers)
  76. */
  77. ParametersWithIV ivParams = parameters as ParametersWithIV;
  78. if (ivParams == null)
  79. throw new ArgumentException(AlgorithmName + " Init requires an IV", "parameters");
  80. byte[] iv = ivParams.GetIV();
  81. if (iv == null || iv.Length != NonceSize)
  82. throw new ArgumentException(AlgorithmName + " requires exactly " + NonceSize + " bytes of IV");
  83. ICipherParameters keyParam = ivParams.Parameters;
  84. if (keyParam == null)
  85. {
  86. if (!initialised)
  87. throw new InvalidOperationException(AlgorithmName + " KeyParameter can not be null for first initialisation");
  88. SetKey(null, iv);
  89. }
  90. else if (keyParam is KeyParameter)
  91. {
  92. SetKey(((KeyParameter)keyParam).GetKey(), iv);
  93. }
  94. else
  95. {
  96. throw new ArgumentException(AlgorithmName + " Init parameters must contain a KeyParameter (or null for re-init)");
  97. }
  98. Reset();
  99. initialised = true;
  100. }
  101. protected virtual int NonceSize
  102. {
  103. get { return 8; }
  104. }
  105. public virtual string AlgorithmName
  106. {
  107. get
  108. {
  109. string name = "Salsa20";
  110. if (rounds != DEFAULT_ROUNDS)
  111. {
  112. name += "/" + rounds;
  113. }
  114. return name;
  115. }
  116. }
  117. public virtual byte ReturnByte(
  118. byte input)
  119. {
  120. if (LimitExceeded())
  121. {
  122. throw new MaxBytesExceededException("2^70 byte limit per IV; Change IV");
  123. }
  124. if (index == 0)
  125. {
  126. GenerateKeyStream(keyStream);
  127. AdvanceCounter();
  128. }
  129. byte output = (byte)(keyStream[index] ^ input);
  130. index = (index + 1) & 63;
  131. return output;
  132. }
  133. protected virtual void AdvanceCounter()
  134. {
  135. if (++engineState[8] == 0)
  136. {
  137. ++engineState[9];
  138. }
  139. }
  140. public virtual void ProcessBytes(
  141. byte[] inBytes,
  142. int inOff,
  143. int len,
  144. byte[] outBytes,
  145. int outOff)
  146. {
  147. if (!initialised)
  148. throw new InvalidOperationException(AlgorithmName + " not initialised");
  149. Check.DataLength(inBytes, inOff, len, "input buffer too short");
  150. Check.OutputLength(outBytes, outOff, len, "output buffer too short");
  151. if (LimitExceeded((uint)len))
  152. throw new MaxBytesExceededException("2^70 byte limit per IV would be exceeded; Change IV");
  153. for (int i = 0; i < len; i++)
  154. {
  155. if (index == 0)
  156. {
  157. GenerateKeyStream(keyStream);
  158. AdvanceCounter();
  159. }
  160. outBytes[i+outOff] = (byte)(keyStream[index]^inBytes[i+inOff]);
  161. index = (index + 1) & 63;
  162. }
  163. }
  164. #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER || UNITY_2021_2_OR_NEWER
  165. public virtual void ProcessBytes(ReadOnlySpan<byte> input, Span<byte> output)
  166. {
  167. if (!initialised)
  168. throw new InvalidOperationException(AlgorithmName + " not initialised");
  169. Check.OutputLength(output, input.Length, "output buffer too short");
  170. if (LimitExceeded((uint)input.Length))
  171. throw new MaxBytesExceededException("2^70 byte limit per IV would be exceeded; Change IV");
  172. for (int i = 0; i < input.Length; i++)
  173. {
  174. if (index == 0)
  175. {
  176. GenerateKeyStream(keyStream);
  177. AdvanceCounter();
  178. }
  179. output[i] = (byte)(keyStream[index++] ^ input[i]);
  180. index &= 63;
  181. }
  182. }
  183. #endif
  184. public virtual void Reset()
  185. {
  186. index = 0;
  187. ResetLimitCounter();
  188. ResetCounter();
  189. }
  190. protected virtual void ResetCounter()
  191. {
  192. engineState[8] = engineState[9] = 0;
  193. }
  194. protected virtual void SetKey(byte[] keyBytes, byte[] ivBytes)
  195. {
  196. if (keyBytes != null)
  197. {
  198. if ((keyBytes.Length != 16) && (keyBytes.Length != 32))
  199. throw new ArgumentException(AlgorithmName + " requires 128 bit or 256 bit key");
  200. int tsOff = (keyBytes.Length - 16) / 4;
  201. engineState[0] = TAU_SIGMA[tsOff];
  202. engineState[5] = TAU_SIGMA[tsOff + 1];
  203. engineState[10] = TAU_SIGMA[tsOff + 2];
  204. engineState[15] = TAU_SIGMA[tsOff + 3];
  205. // Key
  206. Pack.LE_To_UInt32(keyBytes, 0, engineState, 1, 4);
  207. Pack.LE_To_UInt32(keyBytes, keyBytes.Length - 16, engineState, 11, 4);
  208. }
  209. // IV
  210. Pack.LE_To_UInt32(ivBytes, 0, engineState, 6, 2);
  211. }
  212. protected virtual void GenerateKeyStream(byte[] output)
  213. {
  214. SalsaCore(rounds, engineState, x);
  215. Pack.UInt32_To_LE(x, output, 0);
  216. }
  217. #if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER || UNITY_2021_2_OR_NEWER
  218. internal static void SalsaCore(int rounds, ReadOnlySpan<uint> input, Span<uint> output)
  219. {
  220. if (input.Length < 16)
  221. throw new ArgumentException();
  222. if (output.Length < 16)
  223. throw new ArgumentException();
  224. if (rounds % 2 != 0)
  225. throw new ArgumentException("Number of rounds must be even");
  226. #if NETCOREAPP3_0_OR_GREATER
  227. if (Sse41.IsSupported && BitConverter.IsLittleEndian && Unsafe.SizeOf<Vector128<short>>() == 16)
  228. {
  229. Vector128<uint> b0, b1, b2, b3;
  230. {
  231. var I = MemoryMarshal.AsBytes(input[..16]);
  232. var t0 = MemoryMarshal.Read<Vector128<short>>(I[0x00..0x10]);
  233. var t1 = MemoryMarshal.Read<Vector128<short>>(I[0x10..0x20]);
  234. var t2 = MemoryMarshal.Read<Vector128<short>>(I[0x20..0x30]);
  235. var t3 = MemoryMarshal.Read<Vector128<short>>(I[0x30..0x40]);
  236. var u0 = Sse41.Blend(t0, t2, 0xF0);
  237. var u1 = Sse41.Blend(t1, t3, 0xC3);
  238. var u2 = Sse41.Blend(t0, t2, 0x0F);
  239. var u3 = Sse41.Blend(t1, t3, 0x3C);
  240. b0 = Sse41.Blend(u0, u1, 0xCC).AsUInt32();
  241. b1 = Sse41.Blend(u0, u1, 0x33).AsUInt32();
  242. b2 = Sse41.Blend(u2, u3, 0xCC).AsUInt32();
  243. b3 = Sse41.Blend(u2, u3, 0x33).AsUInt32();
  244. }
  245. var c0 = b0;
  246. var c1 = b1;
  247. var c2 = b2;
  248. var c3 = b3;
  249. for (int i = rounds; i > 0; i -= 2)
  250. {
  251. QuarterRound_Sse2(ref c0, ref c3, ref c2, ref c1);
  252. QuarterRound_Sse2(ref c0, ref c1, ref c2, ref c3);
  253. }
  254. b0 = Sse2.Add(b0, c0);
  255. b1 = Sse2.Add(b1, c1);
  256. b2 = Sse2.Add(b2, c2);
  257. b3 = Sse2.Add(b3, c3);
  258. {
  259. var t0 = b0.AsUInt16();
  260. var t1 = b1.AsUInt16();
  261. var t2 = b2.AsUInt16();
  262. var t3 = b3.AsUInt16();
  263. var u0 = Sse41.Blend(t0, t1, 0xCC);
  264. var u1 = Sse41.Blend(t0, t1, 0x33);
  265. var u2 = Sse41.Blend(t2, t3, 0xCC);
  266. var u3 = Sse41.Blend(t2, t3, 0x33);
  267. var v0 = Sse41.Blend(u0, u2, 0xF0);
  268. var v1 = Sse41.Blend(u1, u3, 0xC3);
  269. var v2 = Sse41.Blend(u0, u2, 0x0F);
  270. var v3 = Sse41.Blend(u1, u3, 0x3C);
  271. var X = MemoryMarshal.AsBytes(output[..16]);
  272. MemoryMarshal.Write(X[0x00..0x10], ref v0);
  273. MemoryMarshal.Write(X[0x10..0x20], ref v1);
  274. MemoryMarshal.Write(X[0x20..0x30], ref v2);
  275. MemoryMarshal.Write(X[0x30..0x40], ref v3);
  276. }
  277. return;
  278. }
  279. #endif
  280. uint x00 = input[ 0];
  281. uint x01 = input[ 1];
  282. uint x02 = input[ 2];
  283. uint x03 = input[ 3];
  284. uint x04 = input[ 4];
  285. uint x05 = input[ 5];
  286. uint x06 = input[ 6];
  287. uint x07 = input[ 7];
  288. uint x08 = input[ 8];
  289. uint x09 = input[ 9];
  290. uint x10 = input[10];
  291. uint x11 = input[11];
  292. uint x12 = input[12];
  293. uint x13 = input[13];
  294. uint x14 = input[14];
  295. uint x15 = input[15];
  296. for (int i = rounds; i > 0; i -= 2)
  297. {
  298. QuarterRound(ref x00, ref x04, ref x08, ref x12);
  299. QuarterRound(ref x05, ref x09, ref x13, ref x01);
  300. QuarterRound(ref x10, ref x14, ref x02, ref x06);
  301. QuarterRound(ref x15, ref x03, ref x07, ref x11);
  302. QuarterRound(ref x00, ref x01, ref x02, ref x03);
  303. QuarterRound(ref x05, ref x06, ref x07, ref x04);
  304. QuarterRound(ref x10, ref x11, ref x08, ref x09);
  305. QuarterRound(ref x15, ref x12, ref x13, ref x14);
  306. }
  307. output[ 0] = x00 + input[ 0];
  308. output[ 1] = x01 + input[ 1];
  309. output[ 2] = x02 + input[ 2];
  310. output[ 3] = x03 + input[ 3];
  311. output[ 4] = x04 + input[ 4];
  312. output[ 5] = x05 + input[ 5];
  313. output[ 6] = x06 + input[ 6];
  314. output[ 7] = x07 + input[ 7];
  315. output[ 8] = x08 + input[ 8];
  316. output[ 9] = x09 + input[ 9];
  317. output[10] = x10 + input[10];
  318. output[11] = x11 + input[11];
  319. output[12] = x12 + input[12];
  320. output[13] = x13 + input[13];
  321. output[14] = x14 + input[14];
  322. output[15] = x15 + input[15];
  323. }
  324. #else
  325. internal static void SalsaCore(int rounds, uint[] input, uint[] output)
  326. {
  327. if (input.Length < 16)
  328. throw new ArgumentException();
  329. if (output.Length < 16)
  330. throw new ArgumentException();
  331. if (rounds % 2 != 0)
  332. throw new ArgumentException("Number of rounds must be even");
  333. uint x00 = input[ 0];
  334. uint x01 = input[ 1];
  335. uint x02 = input[ 2];
  336. uint x03 = input[ 3];
  337. uint x04 = input[ 4];
  338. uint x05 = input[ 5];
  339. uint x06 = input[ 6];
  340. uint x07 = input[ 7];
  341. uint x08 = input[ 8];
  342. uint x09 = input[ 9];
  343. uint x10 = input[10];
  344. uint x11 = input[11];
  345. uint x12 = input[12];
  346. uint x13 = input[13];
  347. uint x14 = input[14];
  348. uint x15 = input[15];
  349. for (int i = rounds; i > 0; i -= 2)
  350. {
  351. QuarterRound(ref x00, ref x04, ref x08, ref x12);
  352. QuarterRound(ref x05, ref x09, ref x13, ref x01);
  353. QuarterRound(ref x10, ref x14, ref x02, ref x06);
  354. QuarterRound(ref x15, ref x03, ref x07, ref x11);
  355. QuarterRound(ref x00, ref x01, ref x02, ref x03);
  356. QuarterRound(ref x05, ref x06, ref x07, ref x04);
  357. QuarterRound(ref x10, ref x11, ref x08, ref x09);
  358. QuarterRound(ref x15, ref x12, ref x13, ref x14);
  359. }
  360. output[ 0] = x00 + input[ 0];
  361. output[ 1] = x01 + input[ 1];
  362. output[ 2] = x02 + input[ 2];
  363. output[ 3] = x03 + input[ 3];
  364. output[ 4] = x04 + input[ 4];
  365. output[ 5] = x05 + input[ 5];
  366. output[ 6] = x06 + input[ 6];
  367. output[ 7] = x07 + input[ 7];
  368. output[ 8] = x08 + input[ 8];
  369. output[ 9] = x09 + input[ 9];
  370. output[10] = x10 + input[10];
  371. output[11] = x11 + input[11];
  372. output[12] = x12 + input[12];
  373. output[13] = x13 + input[13];
  374. output[14] = x14 + input[14];
  375. output[15] = x15 + input[15];
  376. }
  377. #endif
  378. internal void ResetLimitCounter()
  379. {
  380. cW0 = 0;
  381. cW1 = 0;
  382. cW2 = 0;
  383. }
  384. internal bool LimitExceeded()
  385. {
  386. if (++cW0 == 0)
  387. {
  388. if (++cW1 == 0)
  389. {
  390. return (++cW2 & 0x20) != 0; // 2^(32 + 32 + 6)
  391. }
  392. }
  393. return false;
  394. }
  395. /*
  396. * this relies on the fact len will always be positive.
  397. */
  398. internal bool LimitExceeded(
  399. uint len)
  400. {
  401. uint old = cW0;
  402. cW0 += len;
  403. if (cW0 < old)
  404. {
  405. if (++cW1 == 0)
  406. {
  407. return (++cW2 & 0x20) != 0; // 2^(32 + 32 + 6)
  408. }
  409. }
  410. return false;
  411. }
  412. #if NETSTANDARD1_0_OR_GREATER || NETCOREAPP1_0_OR_GREATER || UNITY_2021_2_OR_NEWER
  413. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  414. #endif
  415. private static void QuarterRound(ref uint a, ref uint b, ref uint c, ref uint d)
  416. {
  417. b ^= Integers.RotateLeft(a + d, 7);
  418. c ^= Integers.RotateLeft(b + a, 9);
  419. d ^= Integers.RotateLeft(c + b, 13);
  420. a ^= Integers.RotateLeft(d + c, 18);
  421. }
  422. #if NETCOREAPP3_0_OR_GREATER
  423. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  424. private static void QuarterRound_Sse2(ref Vector128<uint> a, ref Vector128<uint> b, ref Vector128<uint> c,
  425. ref Vector128<uint> d)
  426. {
  427. b = Sse2.Xor(b, Rotate_Sse2(Sse2.Add(a, d), 7));
  428. c = Sse2.Xor(c, Rotate_Sse2(Sse2.Add(b, a), 9));
  429. d = Sse2.Xor(d, Rotate_Sse2(Sse2.Add(c, b), 13));
  430. a = Sse2.Xor(a, Rotate_Sse2(Sse2.Add(d, c), 18));
  431. b = Sse2.Shuffle(b, 0x93);
  432. c = Sse2.Shuffle(c, 0x4E);
  433. d = Sse2.Shuffle(d, 0x39);
  434. }
  435. [MethodImpl(MethodImplOptions.AggressiveInlining)]
  436. private static Vector128<uint> Rotate_Sse2(Vector128<uint> x, byte sl)
  437. {
  438. byte sr = (byte)(32 - sl);
  439. return Sse2.Xor(Sse2.ShiftLeftLogical(x, sl), Sse2.ShiftRightLogical(x, sr));
  440. }
  441. #endif
  442. }
  443. }
  444. #pragma warning restore
  445. #endif