#if !BESTHTTP_DISABLE_ALTERNATE_SSL && (!UNITY_WEBGL || UNITY_EDITOR)
#pragma warning disable
using System;
#if NETSTANDARD1_0_OR_GREATER || NETCOREAPP1_0_OR_GREATER || UNITY_2021_2_OR_NEWER
using System.Runtime.CompilerServices;
#endif
#if NETCOREAPP3_0_OR_GREATER
using System.Runtime.InteropServices;
using System.Runtime.Intrinsics;
using System.Runtime.Intrinsics.X86;
#endif
using Best.HTTP.SecureProtocol.Org.BouncyCastle.Crypto.Parameters;
using Best.HTTP.SecureProtocol.Org.BouncyCastle.Crypto.Utilities;
using Best.HTTP.SecureProtocol.Org.BouncyCastle.Utilities;
namespace Best.HTTP.SecureProtocol.Org.BouncyCastle.Crypto.Engines
{
///
/// Implementation of Daniel J. Bernstein's Salsa20 stream cipher, Snuffle 2005
///
public class Salsa20Engine
: IStreamCipher
{
public static readonly int DEFAULT_ROUNDS = 20;
/** Constants */
private const int StateSize = 16; // 16, 32 bit ints = 64 bytes
private readonly static uint[] TAU_SIGMA = Pack.LE_To_UInt32(Strings.ToAsciiByteArray("expand 16-byte k" + "expand 32-byte k"), 0, 8);
internal void PackTauOrSigma(int keyLength, uint[] state, int stateOffset)
{
int tsOff = (keyLength - 16) / 4;
state[stateOffset] = TAU_SIGMA[tsOff];
state[stateOffset + 1] = TAU_SIGMA[tsOff + 1];
state[stateOffset + 2] = TAU_SIGMA[tsOff + 2];
state[stateOffset + 3] = TAU_SIGMA[tsOff + 3];
}
protected int rounds;
/*
* variables to hold the state of the engine
* during encryption and decryption
*/
internal int index = 0;
internal uint[] engineState = new uint[StateSize]; // state
internal uint[] x = new uint[StateSize]; // internal buffer
internal byte[] keyStream = new byte[StateSize * 4]; // expanded state, 64 bytes
internal bool initialised = false;
/*
* internal counter
*/
private uint cW0, cW1, cW2;
///
/// Creates a 20 round Salsa20 engine.
///
public Salsa20Engine()
: this(DEFAULT_ROUNDS)
{
}
///
/// Creates a Salsa20 engine with a specific number of rounds.
///
/// the number of rounds (must be an even number).
public Salsa20Engine(int rounds)
{
if (rounds <= 0 || (rounds & 1) != 0)
{
throw new ArgumentException("'rounds' must be a positive, even number");
}
this.rounds = rounds;
}
public virtual void Init(
bool forEncryption,
ICipherParameters parameters)
{
/*
* Salsa20 encryption and decryption is completely
* symmetrical, so the 'forEncryption' is
* irrelevant. (Like 90% of stream ciphers)
*/
ParametersWithIV ivParams = parameters as ParametersWithIV;
if (ivParams == null)
throw new ArgumentException(AlgorithmName + " Init requires an IV", "parameters");
byte[] iv = ivParams.GetIV();
if (iv == null || iv.Length != NonceSize)
throw new ArgumentException(AlgorithmName + " requires exactly " + NonceSize + " bytes of IV");
ICipherParameters keyParam = ivParams.Parameters;
if (keyParam == null)
{
if (!initialised)
throw new InvalidOperationException(AlgorithmName + " KeyParameter can not be null for first initialisation");
SetKey(null, iv);
}
else if (keyParam is KeyParameter)
{
SetKey(((KeyParameter)keyParam).GetKey(), iv);
}
else
{
throw new ArgumentException(AlgorithmName + " Init parameters must contain a KeyParameter (or null for re-init)");
}
Reset();
initialised = true;
}
protected virtual int NonceSize
{
get { return 8; }
}
public virtual string AlgorithmName
{
get
{
string name = "Salsa20";
if (rounds != DEFAULT_ROUNDS)
{
name += "/" + rounds;
}
return name;
}
}
public virtual byte ReturnByte(
byte input)
{
if (LimitExceeded())
{
throw new MaxBytesExceededException("2^70 byte limit per IV; Change IV");
}
if (index == 0)
{
GenerateKeyStream(keyStream);
AdvanceCounter();
}
byte output = (byte)(keyStream[index] ^ input);
index = (index + 1) & 63;
return output;
}
protected virtual void AdvanceCounter()
{
if (++engineState[8] == 0)
{
++engineState[9];
}
}
public virtual void ProcessBytes(
byte[] inBytes,
int inOff,
int len,
byte[] outBytes,
int outOff)
{
if (!initialised)
throw new InvalidOperationException(AlgorithmName + " not initialised");
Check.DataLength(inBytes, inOff, len, "input buffer too short");
Check.OutputLength(outBytes, outOff, len, "output buffer too short");
if (LimitExceeded((uint)len))
throw new MaxBytesExceededException("2^70 byte limit per IV would be exceeded; Change IV");
for (int i = 0; i < len; i++)
{
if (index == 0)
{
GenerateKeyStream(keyStream);
AdvanceCounter();
}
outBytes[i+outOff] = (byte)(keyStream[index]^inBytes[i+inOff]);
index = (index + 1) & 63;
}
}
#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER || UNITY_2021_2_OR_NEWER
public virtual void ProcessBytes(ReadOnlySpan input, Span output)
{
if (!initialised)
throw new InvalidOperationException(AlgorithmName + " not initialised");
Check.OutputLength(output, input.Length, "output buffer too short");
if (LimitExceeded((uint)input.Length))
throw new MaxBytesExceededException("2^70 byte limit per IV would be exceeded; Change IV");
for (int i = 0; i < input.Length; i++)
{
if (index == 0)
{
GenerateKeyStream(keyStream);
AdvanceCounter();
}
output[i] = (byte)(keyStream[index++] ^ input[i]);
index &= 63;
}
}
#endif
public virtual void Reset()
{
index = 0;
ResetLimitCounter();
ResetCounter();
}
protected virtual void ResetCounter()
{
engineState[8] = engineState[9] = 0;
}
protected virtual void SetKey(byte[] keyBytes, byte[] ivBytes)
{
if (keyBytes != null)
{
if ((keyBytes.Length != 16) && (keyBytes.Length != 32))
throw new ArgumentException(AlgorithmName + " requires 128 bit or 256 bit key");
int tsOff = (keyBytes.Length - 16) / 4;
engineState[0] = TAU_SIGMA[tsOff];
engineState[5] = TAU_SIGMA[tsOff + 1];
engineState[10] = TAU_SIGMA[tsOff + 2];
engineState[15] = TAU_SIGMA[tsOff + 3];
// Key
Pack.LE_To_UInt32(keyBytes, 0, engineState, 1, 4);
Pack.LE_To_UInt32(keyBytes, keyBytes.Length - 16, engineState, 11, 4);
}
// IV
Pack.LE_To_UInt32(ivBytes, 0, engineState, 6, 2);
}
protected virtual void GenerateKeyStream(byte[] output)
{
SalsaCore(rounds, engineState, x);
Pack.UInt32_To_LE(x, output, 0);
}
#if NETCOREAPP2_1_OR_GREATER || NETSTANDARD2_1_OR_GREATER || UNITY_2021_2_OR_NEWER
internal static void SalsaCore(int rounds, ReadOnlySpan input, Span output)
{
if (input.Length < 16)
throw new ArgumentException();
if (output.Length < 16)
throw new ArgumentException();
if (rounds % 2 != 0)
throw new ArgumentException("Number of rounds must be even");
#if NETCOREAPP3_0_OR_GREATER
if (Sse41.IsSupported && BitConverter.IsLittleEndian && Unsafe.SizeOf>() == 16)
{
Vector128 b0, b1, b2, b3;
{
var I = MemoryMarshal.AsBytes(input[..16]);
var t0 = MemoryMarshal.Read>(I[0x00..0x10]);
var t1 = MemoryMarshal.Read>(I[0x10..0x20]);
var t2 = MemoryMarshal.Read>(I[0x20..0x30]);
var t3 = MemoryMarshal.Read>(I[0x30..0x40]);
var u0 = Sse41.Blend(t0, t2, 0xF0);
var u1 = Sse41.Blend(t1, t3, 0xC3);
var u2 = Sse41.Blend(t0, t2, 0x0F);
var u3 = Sse41.Blend(t1, t3, 0x3C);
b0 = Sse41.Blend(u0, u1, 0xCC).AsUInt32();
b1 = Sse41.Blend(u0, u1, 0x33).AsUInt32();
b2 = Sse41.Blend(u2, u3, 0xCC).AsUInt32();
b3 = Sse41.Blend(u2, u3, 0x33).AsUInt32();
}
var c0 = b0;
var c1 = b1;
var c2 = b2;
var c3 = b3;
for (int i = rounds; i > 0; i -= 2)
{
QuarterRound_Sse2(ref c0, ref c3, ref c2, ref c1);
QuarterRound_Sse2(ref c0, ref c1, ref c2, ref c3);
}
b0 = Sse2.Add(b0, c0);
b1 = Sse2.Add(b1, c1);
b2 = Sse2.Add(b2, c2);
b3 = Sse2.Add(b3, c3);
{
var t0 = b0.AsUInt16();
var t1 = b1.AsUInt16();
var t2 = b2.AsUInt16();
var t3 = b3.AsUInt16();
var u0 = Sse41.Blend(t0, t1, 0xCC);
var u1 = Sse41.Blend(t0, t1, 0x33);
var u2 = Sse41.Blend(t2, t3, 0xCC);
var u3 = Sse41.Blend(t2, t3, 0x33);
var v0 = Sse41.Blend(u0, u2, 0xF0);
var v1 = Sse41.Blend(u1, u3, 0xC3);
var v2 = Sse41.Blend(u0, u2, 0x0F);
var v3 = Sse41.Blend(u1, u3, 0x3C);
var X = MemoryMarshal.AsBytes(output[..16]);
MemoryMarshal.Write(X[0x00..0x10], ref v0);
MemoryMarshal.Write(X[0x10..0x20], ref v1);
MemoryMarshal.Write(X[0x20..0x30], ref v2);
MemoryMarshal.Write(X[0x30..0x40], ref v3);
}
return;
}
#endif
uint x00 = input[ 0];
uint x01 = input[ 1];
uint x02 = input[ 2];
uint x03 = input[ 3];
uint x04 = input[ 4];
uint x05 = input[ 5];
uint x06 = input[ 6];
uint x07 = input[ 7];
uint x08 = input[ 8];
uint x09 = input[ 9];
uint x10 = input[10];
uint x11 = input[11];
uint x12 = input[12];
uint x13 = input[13];
uint x14 = input[14];
uint x15 = input[15];
for (int i = rounds; i > 0; i -= 2)
{
QuarterRound(ref x00, ref x04, ref x08, ref x12);
QuarterRound(ref x05, ref x09, ref x13, ref x01);
QuarterRound(ref x10, ref x14, ref x02, ref x06);
QuarterRound(ref x15, ref x03, ref x07, ref x11);
QuarterRound(ref x00, ref x01, ref x02, ref x03);
QuarterRound(ref x05, ref x06, ref x07, ref x04);
QuarterRound(ref x10, ref x11, ref x08, ref x09);
QuarterRound(ref x15, ref x12, ref x13, ref x14);
}
output[ 0] = x00 + input[ 0];
output[ 1] = x01 + input[ 1];
output[ 2] = x02 + input[ 2];
output[ 3] = x03 + input[ 3];
output[ 4] = x04 + input[ 4];
output[ 5] = x05 + input[ 5];
output[ 6] = x06 + input[ 6];
output[ 7] = x07 + input[ 7];
output[ 8] = x08 + input[ 8];
output[ 9] = x09 + input[ 9];
output[10] = x10 + input[10];
output[11] = x11 + input[11];
output[12] = x12 + input[12];
output[13] = x13 + input[13];
output[14] = x14 + input[14];
output[15] = x15 + input[15];
}
#else
internal static void SalsaCore(int rounds, uint[] input, uint[] output)
{
if (input.Length < 16)
throw new ArgumentException();
if (output.Length < 16)
throw new ArgumentException();
if (rounds % 2 != 0)
throw new ArgumentException("Number of rounds must be even");
uint x00 = input[ 0];
uint x01 = input[ 1];
uint x02 = input[ 2];
uint x03 = input[ 3];
uint x04 = input[ 4];
uint x05 = input[ 5];
uint x06 = input[ 6];
uint x07 = input[ 7];
uint x08 = input[ 8];
uint x09 = input[ 9];
uint x10 = input[10];
uint x11 = input[11];
uint x12 = input[12];
uint x13 = input[13];
uint x14 = input[14];
uint x15 = input[15];
for (int i = rounds; i > 0; i -= 2)
{
QuarterRound(ref x00, ref x04, ref x08, ref x12);
QuarterRound(ref x05, ref x09, ref x13, ref x01);
QuarterRound(ref x10, ref x14, ref x02, ref x06);
QuarterRound(ref x15, ref x03, ref x07, ref x11);
QuarterRound(ref x00, ref x01, ref x02, ref x03);
QuarterRound(ref x05, ref x06, ref x07, ref x04);
QuarterRound(ref x10, ref x11, ref x08, ref x09);
QuarterRound(ref x15, ref x12, ref x13, ref x14);
}
output[ 0] = x00 + input[ 0];
output[ 1] = x01 + input[ 1];
output[ 2] = x02 + input[ 2];
output[ 3] = x03 + input[ 3];
output[ 4] = x04 + input[ 4];
output[ 5] = x05 + input[ 5];
output[ 6] = x06 + input[ 6];
output[ 7] = x07 + input[ 7];
output[ 8] = x08 + input[ 8];
output[ 9] = x09 + input[ 9];
output[10] = x10 + input[10];
output[11] = x11 + input[11];
output[12] = x12 + input[12];
output[13] = x13 + input[13];
output[14] = x14 + input[14];
output[15] = x15 + input[15];
}
#endif
internal void ResetLimitCounter()
{
cW0 = 0;
cW1 = 0;
cW2 = 0;
}
internal bool LimitExceeded()
{
if (++cW0 == 0)
{
if (++cW1 == 0)
{
return (++cW2 & 0x20) != 0; // 2^(32 + 32 + 6)
}
}
return false;
}
/*
* this relies on the fact len will always be positive.
*/
internal bool LimitExceeded(
uint len)
{
uint old = cW0;
cW0 += len;
if (cW0 < old)
{
if (++cW1 == 0)
{
return (++cW2 & 0x20) != 0; // 2^(32 + 32 + 6)
}
}
return false;
}
#if NETSTANDARD1_0_OR_GREATER || NETCOREAPP1_0_OR_GREATER || UNITY_2021_2_OR_NEWER
[MethodImpl(MethodImplOptions.AggressiveInlining)]
#endif
private static void QuarterRound(ref uint a, ref uint b, ref uint c, ref uint d)
{
b ^= Integers.RotateLeft(a + d, 7);
c ^= Integers.RotateLeft(b + a, 9);
d ^= Integers.RotateLeft(c + b, 13);
a ^= Integers.RotateLeft(d + c, 18);
}
#if NETCOREAPP3_0_OR_GREATER
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static void QuarterRound_Sse2(ref Vector128 a, ref Vector128 b, ref Vector128 c,
ref Vector128 d)
{
b = Sse2.Xor(b, Rotate_Sse2(Sse2.Add(a, d), 7));
c = Sse2.Xor(c, Rotate_Sse2(Sse2.Add(b, a), 9));
d = Sse2.Xor(d, Rotate_Sse2(Sse2.Add(c, b), 13));
a = Sse2.Xor(a, Rotate_Sse2(Sse2.Add(d, c), 18));
b = Sse2.Shuffle(b, 0x93);
c = Sse2.Shuffle(c, 0x4E);
d = Sse2.Shuffle(d, 0x39);
}
[MethodImpl(MethodImplOptions.AggressiveInlining)]
private static Vector128 Rotate_Sse2(Vector128 x, byte sl)
{
byte sr = (byte)(32 - sl);
return Sse2.Xor(Sse2.ShiftLeftLogical(x, sl), Sse2.ShiftRightLogical(x, sr));
}
#endif
}
}
#pragma warning restore
#endif