Fixed Point Math in C#

Fixed point math in C#

Ok, here's what I've come up with for a fixed-point struct, based on the link in my original question but also including some fixes to how it was handling division and multiplication, and added logic for modules, comparisons, shifts, etc:

public struct FInt
{
public long RawValue;
public const int SHIFT_AMOUNT = 12; //12 is 4096

public const long One = 1 << SHIFT_AMOUNT;
public const int OneI = 1 << SHIFT_AMOUNT;
public static FInt OneF = FInt.Create( 1, true );

#region Constructors
public static FInt Create( long StartingRawValue, bool UseMultiple )
{
FInt fInt;
fInt.RawValue = StartingRawValue;
if ( UseMultiple )
fInt.RawValue = fInt.RawValue << SHIFT_AMOUNT;
return fInt;
}
public static FInt Create( double DoubleValue )
{
FInt fInt;
DoubleValue *= (double)One;
fInt.RawValue = (int)Math.Round( DoubleValue );
return fInt;
}
#endregion

public int IntValue
{
get { return (int)( this.RawValue >> SHIFT_AMOUNT ); }
}

public int ToInt()
{
return (int)( this.RawValue >> SHIFT_AMOUNT );
}

public double ToDouble()
{
return (double)this.RawValue / (double)One;
}

public FInt Inverse
{
get { return FInt.Create( -this.RawValue, false ); }
}

#region FromParts
/// <summary>
/// Create a fixed-int number from parts. For example, to create 1.5 pass in 1 and 500.
/// </summary>
/// <param name="PreDecimal">The number above the decimal. For 1.5, this would be 1.</param>
/// <param name="PostDecimal">The number below the decimal, to three digits.
/// For 1.5, this would be 500. For 1.005, this would be 5.</param>
/// <returns>A fixed-int representation of the number parts</returns>
public static FInt FromParts( int PreDecimal, int PostDecimal )
{
FInt f = FInt.Create( PreDecimal, true );
if ( PostDecimal != 0 )
f.RawValue += ( FInt.Create( PostDecimal ) / 1000 ).RawValue;

return f;
}
#endregion

#region *
public static FInt operator *( FInt one, FInt other )
{
FInt fInt;
fInt.RawValue = ( one.RawValue * other.RawValue ) >> SHIFT_AMOUNT;
return fInt;
}

public static FInt operator *( FInt one, int multi )
{
return one * (FInt)multi;
}

public static FInt operator *( int multi, FInt one )
{
return one * (FInt)multi;
}
#endregion

#region /
public static FInt operator /( FInt one, FInt other )
{
FInt fInt;
fInt.RawValue = ( one.RawValue << SHIFT_AMOUNT ) / ( other.RawValue );
return fInt;
}

public static FInt operator /( FInt one, int divisor )
{
return one / (FInt)divisor;
}

public static FInt operator /( int divisor, FInt one )
{
return (FInt)divisor / one;
}
#endregion

#region %
public static FInt operator %( FInt one, FInt other )
{
FInt fInt;
fInt.RawValue = ( one.RawValue ) % ( other.RawValue );
return fInt;
}

public static FInt operator %( FInt one, int divisor )
{
return one % (FInt)divisor;
}

public static FInt operator %( int divisor, FInt one )
{
return (FInt)divisor % one;
}
#endregion

#region +
public static FInt operator +( FInt one, FInt other )
{
FInt fInt;
fInt.RawValue = one.RawValue + other.RawValue;
return fInt;
}

public static FInt operator +( FInt one, int other )
{
return one + (FInt)other;
}

public static FInt operator +( int other, FInt one )
{
return one + (FInt)other;
}
#endregion

#region -
public static FInt operator -( FInt one, FInt other )
{
FInt fInt;
fInt.RawValue = one.RawValue - other.RawValue;
return fInt;
}

public static FInt operator -( FInt one, int other )
{
return one - (FInt)other;
}

public static FInt operator -( int other, FInt one )
{
return (FInt)other - one;
}
#endregion

#region ==
public static bool operator ==( FInt one, FInt other )
{
return one.RawValue == other.RawValue;
}

public static bool operator ==( FInt one, int other )
{
return one == (FInt)other;
}

public static bool operator ==( int other, FInt one )
{
return (FInt)other == one;
}
#endregion

#region !=
public static bool operator !=( FInt one, FInt other )
{
return one.RawValue != other.RawValue;
}

public static bool operator !=( FInt one, int other )
{
return one != (FInt)other;
}

public static bool operator !=( int other, FInt one )
{
return (FInt)other != one;
}
#endregion

#region >=
public static bool operator >=( FInt one, FInt other )
{
return one.RawValue >= other.RawValue;
}

public static bool operator >=( FInt one, int other )
{
return one >= (FInt)other;
}

public static bool operator >=( int other, FInt one )
{
return (FInt)other >= one;
}
#endregion

#region <=
public static bool operator <=( FInt one, FInt other )
{
return one.RawValue <= other.RawValue;
}

public static bool operator <=( FInt one, int other )
{
return one <= (FInt)other;
}

public static bool operator <=( int other, FInt one )
{
return (FInt)other <= one;
}
#endregion

#region >
public static bool operator >( FInt one, FInt other )
{
return one.RawValue > other.RawValue;
}

public static bool operator >( FInt one, int other )
{
return one > (FInt)other;
}

public static bool operator >( int other, FInt one )
{
return (FInt)other > one;
}
#endregion

#region <
public static bool operator <( FInt one, FInt other )
{
return one.RawValue < other.RawValue;
}

public static bool operator <( FInt one, int other )
{
return one < (FInt)other;
}

public static bool operator <( int other, FInt one )
{
return (FInt)other < one;
}
#endregion

public static explicit operator int( FInt src )
{
return (int)( src.RawValue >> SHIFT_AMOUNT );
}

public static explicit operator FInt( int src )
{
return FInt.Create( src, true );
}

public static explicit operator FInt( long src )
{
return FInt.Create( src, true );
}

public static explicit operator FInt( ulong src )
{
return FInt.Create( (long)src, true );
}

public static FInt operator <<( FInt one, int Amount )
{
return FInt.Create( one.RawValue << Amount, false );
}

public static FInt operator >>( FInt one, int Amount )
{
return FInt.Create( one.RawValue >> Amount, false );
}

public override bool Equals( object obj )
{
if ( obj is FInt )
return ( (FInt)obj ).RawValue == this.RawValue;
else
return false;
}

public override int GetHashCode()
{
return RawValue.GetHashCode();
}

public override string ToString()
{
return this.RawValue.ToString();
}
}

public struct FPoint
{
public FInt X;
public FInt Y;

public static FPoint Create( FInt X, FInt Y )
{
FPoint fp;
fp.X = X;
fp.Y = Y;
return fp;
}

public static FPoint FromPoint( Point p )
{
FPoint f;
f.X = (FInt)p.X;
f.Y = (FInt)p.Y;
return f;
}

public static Point ToPoint( FPoint f )
{
return new Point( f.X.IntValue, f.Y.IntValue );
}

#region Vector Operations
public static FPoint VectorAdd( FPoint F1, FPoint F2 )
{
FPoint result;
result.X = F1.X + F2.X;
result.Y = F1.Y + F2.Y;
return result;
}

public static FPoint VectorSubtract( FPoint F1, FPoint F2 )
{
FPoint result;
result.X = F1.X - F2.X;
result.Y = F1.Y - F2.Y;
return result;
}

public static FPoint VectorDivide( FPoint F1, int Divisor )
{
FPoint result;
result.X = F1.X / Divisor;
result.Y = F1.Y / Divisor;
return result;
}
#endregion
}

Based on the comments from ShuggyCoUk, I see that this is in Q12 format. That's reasonably precise for my purposes. Of course, aside from the bugfixes, I already had this basic format before I asked my question. What I was looking for were ways to calculate Sqrt, Atan2, Sin, and Cos in C# using a structure like this. There aren't any other things that I know of in C# that will handle this, but in Java I managed to find the MathFP library by Onno Hommes. It's a liberal source software license, so I've converted some of his functions to my purposes in C# (with a fix to atan2, I think). Enjoy:

    #region PI, DoublePI
public static FInt PI = FInt.Create( 12868, false ); //PI x 2^12
public static FInt TwoPIF = PI * 2; //radian equivalent of 260 degrees
public static FInt PIOver180F = PI / (FInt)180; //PI / 180
#endregion

#region Sqrt
public static FInt Sqrt( FInt f, int NumberOfIterations )
{
if ( f.RawValue < 0 ) //NaN in Math.Sqrt
throw new ArithmeticException( "Input Error" );
if ( f.RawValue == 0 )
return (FInt)0;
FInt k = f + FInt.OneF >> 1;
for ( int i = 0; i < NumberOfIterations; i++ )
k = ( k + ( f / k ) ) >> 1;

if ( k.RawValue < 0 )
throw new ArithmeticException( "Overflow" );
else
return k;
}

public static FInt Sqrt( FInt f )
{
byte numberOfIterations = 8;
if ( f.RawValue > 0x64000 )
numberOfIterations = 12;
if ( f.RawValue > 0x3e8000 )
numberOfIterations = 16;
return Sqrt( f, numberOfIterations );
}
#endregion

#region Sin
public static FInt Sin( FInt i )
{
FInt j = (FInt)0;
for ( ; i < 0; i += FInt.Create( 25736, false ) ) ;
if ( i > FInt.Create( 25736, false ) )
i %= FInt.Create( 25736, false );
FInt k = ( i * FInt.Create( 10, false ) ) / FInt.Create( 714, false );
if ( i != 0 && i != FInt.Create( 6434, false ) && i != FInt.Create( 12868, false ) &&
i != FInt.Create( 19302, false ) && i != FInt.Create( 25736, false ) )
j = ( i * FInt.Create( 100, false ) ) / FInt.Create( 714, false ) - k * FInt.Create( 10, false );
if ( k <= FInt.Create( 90, false ) )
return sin_lookup( k, j );
if ( k <= FInt.Create( 180, false ) )
return sin_lookup( FInt.Create( 180, false ) - k, j );
if ( k <= FInt.Create( 270, false ) )
return sin_lookup( k - FInt.Create( 180, false ), j ).Inverse;
else
return sin_lookup( FInt.Create( 360, false ) - k, j ).Inverse;
}

private static FInt sin_lookup( FInt i, FInt j )
{
if ( j > 0 && j < FInt.Create( 10, false ) && i < FInt.Create( 90, false ) )
return FInt.Create( SIN_TABLE[i.RawValue], false ) +
( ( FInt.Create( SIN_TABLE[i.RawValue + 1], false ) - FInt.Create( SIN_TABLE[i.RawValue], false ) ) /
FInt.Create( 10, false ) ) * j;
else
return FInt.Create( SIN_TABLE[i.RawValue], false );
}

private static int[] SIN_TABLE = {
0, 71, 142, 214, 285, 357, 428, 499, 570, 641,
711, 781, 851, 921, 990, 1060, 1128, 1197, 1265, 1333,
1400, 1468, 1534, 1600, 1665, 1730, 1795, 1859, 1922, 1985,
2048, 2109, 2170, 2230, 2290, 2349, 2407, 2464, 2521, 2577,
2632, 2686, 2740, 2793, 2845, 2896, 2946, 2995, 3043, 3091,
3137, 3183, 3227, 3271, 3313, 3355, 3395, 3434, 3473, 3510,
3547, 3582, 3616, 3649, 3681, 3712, 3741, 3770, 3797, 3823,
3849, 3872, 3895, 3917, 3937, 3956, 3974, 3991, 4006, 4020,
4033, 4045, 4056, 4065, 4073, 4080, 4086, 4090, 4093, 4095,
4096
};
#endregion

private static FInt mul( FInt F1, FInt F2 )
{
return F1 * F2;
}

#region Cos, Tan, Asin
public static FInt Cos( FInt i )
{
return Sin( i + FInt.Create( 6435, false ) );
}

public static FInt Tan( FInt i )
{
return Sin( i ) / Cos( i );
}

public static FInt Asin( FInt F )
{
bool isNegative = F < 0;
F = Abs( F );

if ( F > FInt.OneF )
throw new ArithmeticException( "Bad Asin Input:" + F.ToDouble() );

FInt f1 = mul( mul( mul( mul( FInt.Create( 145103 >> FInt.SHIFT_AMOUNT, false ), F ) -
FInt.Create( 599880 >> FInt.SHIFT_AMOUNT, false ), F ) +
FInt.Create( 1420468 >> FInt.SHIFT_AMOUNT, false ), F ) -
FInt.Create( 3592413 >> FInt.SHIFT_AMOUNT, false ), F ) +
FInt.Create( 26353447 >> FInt.SHIFT_AMOUNT, false );
FInt f2 = PI / FInt.Create( 2, true ) - ( Sqrt( FInt.OneF - F ) * f1 );

return isNegative ? f2.Inverse : f2;
}
#endregion

#region ATan, ATan2
public static FInt Atan( FInt F )
{
return Asin( F / Sqrt( FInt.OneF + ( F * F ) ) );
}

public static FInt Atan2( FInt F1, FInt F2 )
{
if ( F2.RawValue == 0 && F1.RawValue == 0 )
return (FInt)0;

FInt result = (FInt)0;
if ( F2 > 0 )
result = Atan( F1 / F2 );
else if ( F2 < 0 )
{
if ( F1 >= 0 )
result = ( PI - Atan( Abs( F1 / F2 ) ) );
else
result = ( PI - Atan( Abs( F1 / F2 ) ) ).Inverse;
}
else
result = ( F1 >= 0 ? PI : PI.Inverse ) / FInt.Create( 2, true );

return result;
}
#endregion

#region Abs
public static FInt Abs( FInt F )
{
if ( F < 0 )
return F.Inverse;
else
return F;
}
#endregion

There are a number of other functions in Dr. Hommes' MathFP library, but they were beyond what I needed, and so I have not taken the time to translate them to C# (that process was made extra difficult by the fact that he was using a long, and I am using the FInt struct, which makes the conversion rules are a bit challenging to see immediately).

The accuracy of these functions as they are coded here is more than enough for my purposes, but if you need more you can increase the SHIFT AMOUNT on FInt. Just be aware that if you do so, the constants on Dr. Hommes' functions will then need to be divided by 4096 and then multiplied by whatever your new SHIFT AMOUNT requires. You're likely to run into some bugs if you do that and aren't careful, so be sure to run checks against the built-in Math functions to make sure that your results aren't being put off by incorrectly adjusting a constant.

So far, this FInt logic seems as fast, if not perhaps a bit faster, than the equivalent built in .NET functions. That would obviously vary by machine, since the floating point coprocessor would determine that, so I have not run specific benchmarks. But they are integrated into my game now, and I've seen a slight decrease in processor utilization compared to before (this is on a Q6600 quad core -- about a 1% drop in usage on average).

.NET - Why is there no fixed point numeric data type in C#?

Decimal (base-10 floating point) was deemed to be good enough.

Should I Stick to 32bit or 64bit Types for Fixed-Point Number Class?

The type you use is irrelevant, with regards to the platform, as types are types are types, in C#. In other words, a long is always 64 bits, no matter what platform you're on. It's a guarantee of C#.

However, the real problem is going to be precision and scale.. When doing fixed-point math, you're going to have to pick the precision you want to use. That's an easy problem. What's not easy is scaling. If you have numbers that will exceed the maximum value of your chosen type (don't forget to include the decimals in this consideration), then you're broken out of the gate.

Have you looked into the decimal type?

decimal is still a floating-point type, but it is floating-point decimal, rather than IEEE754 binary floating-point, and thus is capable of representing any base-10 number you throw at it, so long as it fits in the scale and precision of the decimal type.

See these links for information on the decimal type:

  • C# In Depth:Decimal
  • MSDN C# decimal reference

decimal comes with some performance considerations, however, and may not be the best choice for a game, if performance is critical. For a simple 2D scroller, you'd be fine, but it's probably not ideal for anything beyond that.

Is floating-point math consistent in C#? Can it be?

I know of no way to way to make normal floating points deterministic in .net. The JITter is allowed to create code that behaves differently on different platforms(or between different versions of .net). So using normal floats in deterministic .net code is not possible.

The workarounds I considered:

  1. Implement FixedPoint32 in C#. While this is not too hard(I have a half finished implementation) the very small range of values makes it annoying to use. You have to be careful at all times so you neither overflow, nor lose too much precision. In the end I found this not easier than using integers directly.
  2. Implement FixedPoint64 in C#. I found this rather hard to do. For some operations intermediate integers of 128bit would be useful. But .net doesn't offer such a type.
  3. Implement a custom 32 bit floatingpoint. The lack of a BitScanReverse intrinsic causes a few annoyances when implementing this. But currently I think this is the most promising path.
  4. Use native code for the math operations. Incurs the overhead of a delegate call on every math operation.

I've just started a software implementation of 32 bit floating point math. It can do about 70million additions/multiplications per second on my 2.66GHz i3.
https://github.com/CodesInChaos/SoftFloat . Obviously it's still very incomplete and buggy.

C# signed fixed point to floating point conversion

This might be slightly more efficient, but I can't see it making much difference:

static double ByteArrayToTemp(byte[] data)
{
if (BitConverter.IsLittleEndian)
Array.Reverse(data);

ushort bits = BitConverter.ToUInt16(data, 0);

double scale = 1 << 6;
double result = 0;

for (int i = 0, bit = 1 << 14; i < 15; ++i, bit >>= 1, scale /= 2)
{
if ((bits & bit) != 0)
result += scale;
}

if ((bits & 0x8000) != 0)
result = -result;

return result;
}

You're not going to be able to avoid a loop when calculating this.

Detecting overflow in fixed-point multiplication

This took me a long time, but I eventually figured everything out. This code is tested to work for every possible combination of x and y in the range allowed by sbyte. Here is the commented code:

    static sbyte AddOverflowHelper(sbyte x, sbyte y, ref bool overflow) {
var sum = (sbyte)(x + y);
// x + y overflows if sign(x) ^ sign(y) != sign(sum)
overflow |= ((x ^ y ^ sum) & sbyte.MinValue) != 0;
return sum;
}

/// <summary>
/// Multiplies two Fix8 numbers.
/// Deals with overflow by saturation.
/// </summary>
public static Fix8 operator *(Fix8 x, Fix8 y) {
// Using the cross-multiplication algorithm, for learning purposes.
// It would be both trivial and much faster to use an Int16, but this technique
// won't work for a Fix64, since there's no Int128 or equivalent (and BigInteger is too slow).

sbyte xl = x.m_rawValue;
sbyte yl = y.m_rawValue;

byte xlo = (byte)(xl & 0x0F);
sbyte xhi = (sbyte)(xl >> 4);
byte ylo = (byte)(yl & 0x0F);
sbyte yhi = (sbyte)(yl >> 4);

byte lolo = (byte)(xlo * ylo);
sbyte lohi = (sbyte)((sbyte)xlo * yhi);
sbyte hilo = (sbyte)(xhi * (sbyte)ylo);
sbyte hihi = (sbyte)(xhi * yhi);

byte loResult = (byte)(lolo >> 4);
sbyte midResult1 = lohi;
sbyte midResult2 = hilo;
sbyte hiResult = (sbyte)(hihi << 4);

bool overflow = false;
// Check for overflow at each step of the sum, if it happens overflow will be true
sbyte sum = AddOverflowHelper((sbyte)loResult, midResult1, ref overflow);
sum = AddOverflowHelper(sum, midResult2, ref overflow);
sum = AddOverflowHelper(sum, hiResult, ref overflow);

bool opSignsEqual = ((xl ^ yl) & sbyte.MinValue) == 0;

// if signs of operands are equal and sign of result is negative,
// then multiplication overflowed positively
// the reverse is also true
if (opSignsEqual) {
if (sum < 0 || (overflow && xl > 0)) {
return MaxValue;
}
}
else {
if (sum > 0) {
return MinValue;
}
// If signs differ, both operands' magnitudes are greater than 1,
// and the result is greater than the negative operand, then there was negative overflow.
sbyte posOp, negOp;
if (xl > yl) {
posOp = xl;
negOp = yl;
}
else {
posOp = yl;
negOp = xl;
}
if (sum > negOp && negOp < -(1 << 4) && posOp > (1 << 4)) {
return MinValue;
}
}

// if the top 4 bits of hihi (unused in the result) are neither all 0s nor 1s,
// then this means the result overflowed.
sbyte topCarry = (sbyte)(hihi >> 4);
// -17 (-1.0625) is a problematic value which never causes overflow but messes up the carry bits
if (topCarry != 0 && topCarry != -1 && xl != -17 && yl != -17) {
return opSignsEqual ? MaxValue : MinValue;
}

// Round up if necessary, but don't overflow
var lowCarry = (byte)(lolo << 4);
if (lowCarry >= 0x80 && sum < sbyte.MaxValue) {
++sum;
}

return new Fix8(sum);
}

I'm putting all this together into a properly unit tested fixed-point math library for .NET, which will be available here: https://github.com/asik/FixedMath.Net

C# fixed point to floating point

I guess it's a

public struct FIXED
{
public short fract;
public short value;
}

that you want to convert to floating point. Such fixed-point numbers can be converted like this

var fix = new FIXED { value = 42, fract = 16384 };
double floating = fix.value + (double)fix.fract / 65536;

I divide by 65536 because a short is 16 bits (2^16). It's actually kind of strange that's it a short and not a ushort since a fraction can't be negative.



Related Topics



Leave a reply



Submit