= 0.1 + (0.3 + 0.6)
x = (0.1 + 0.3) + 0.6
y
print(x, y)
0.9999999999999999 1.0
Stephen J. Mildenhall
2022-10-06
I studied math(s) for 9 years as a undergraduate and graduate student. I never came across a non-associative operator. Standard IEEE floating point addition is non-associative, as the next example shows.
FWIW, after 1 week working as an actuary, I found another: multiple and then round—a process used in determining insurance premiums from a rating manual where the “rate order calculation” had to be specified to regulators.
int.bit_length()
float.as_integer_ratio()
float.hex()
float.from_hex()
struct.pack
math.fsum # sum tracking rounding errors
math.frexp() # gives mantissa and exponent
f'{x:.55f}' # works surprisingly well.
The float is \(f = J/2^N\) where the bit length of \(J\) is at most 53 (see second ref below). This means \(2***52 \le f 2^N < 2**53\).
from math import gcd
def decimal_fraction(x):
sx = str(x)
# no scientific notation for now (small x only)
assert 'e' not in sx
# how many decimal places
fx = len(sx[sx.find('.')+1:])
# numerator and denominator as decimal fraction
d = 10**fx
n = int(d*x)
g = gcd(n, d)
# return simplified
return (n // g, d // g)
def exact_float(f):
"""
Check to see if the internal double float representation of f is exact.
f is written as a decimal fraction n / d
pn, pd = f.as_integer_ratio() gives the binary fraction representation
The representation is exact if n / d = pn / pd, i.e., if n pd == d pn.
The error is n/d - pn/pd = (n pd - d pn) / (d pd)
"""
n, d = decimal_fraction(f)
pn, pd = f.as_integer_ratio()
exact = (n * pd == d * pn)
pde = np.log(pd) / np.log(2)
err = d * pn - n * pd
# error as a fraction
fr_err = err * 10**17 / (d * pd)
return exact, err, fr_err, pd
Wikipedia says double is [sign][11 bits of exponent, stored as exp-1023][52 bits of fractional part b51 b50… b0] = +/- (1.b51….b0) x 2**(exp-1023)
from struct import pack
def binary2(f):
# get 8 byte rep, convert to left padded binaries and concatentate
# bin_rep is the 64 bit representation
bin_rep = ''.join([f'{i:08b}' for i in pack('!d', f)])
nice_bin_rep = ' # '. join([bin_rep[0], bin_rep[1:12], bin_rep[12:]])
# sign | 11 bits | 52 bits split into parts
sign, exponent_plus_1023, mantissa = bin_rep[0], bin_rep[1:12], bin_rep[12:]
# convert to integers
s, e, m = [int(i, 2) for i in [sign, exponent_plus_1023, mantissa]]
# make m < 1, divide by 2**52 and add the implicit 1
m_10 = m / 2**52
# unwind exponent
e = e - 1023
# sign
s = '-' if s else '+'
# answer
check = (1 + m_10) * 2**e
return pd.DataFrame([s, m, m_10, 1+m_10, e, check, nice_bin_rep],
index=['sign', 'mantissa', 'mantissa_10', '1+mantissa_10', 'exponent', 'check', 'binary'],
columns=[f'{f}'])
Relevant webpages
From interesting but not perfect
def binary(number: float):
"""Print a number's floating point representation."""
packed = struct.pack("!d", float(number))
integers = [c for c in packed]
binaries = [bin(i) for i in integers]
stripped_binaries = [s.replace("0b", "") for s in binaries]
padded = [s.rjust(8, "0") for s in stripped_binaries]
final = "".join(padded)
sign, exponent_plus_1023, mantissa = final[0], final[1:12], final[12:]
sign_str = "" if int(sign) == 0 else "-"
mantissa_base10 = (int(mantissa, 2) / 2 ** 52) # shift decimal point from end of binary string to beginning of it
mantissa_base10_str = str(mantissa_base10)[2:] # throw away the leading "0."
mantissa_index = mantissa.rfind("1")
mantissa_index = 0 if mantissa_index == -1 else mantissa_index
exponent_base10 = int(exponent_plus_1023, 2) - 1023
print(f" Decimal: {sign_str}1.{mantissa_base10_str} x 2^{exponent_base10}")
print(f" Binary: {sign_str}1.{mantissa[:mantissa_index + 1]} x 2^{exponent_base10:b}n")
print(f" Sign: {sign} ({'+' if sign == '0' else '-'})")
print(f"Mantissa: {mantissa[:mantissa_index + 1]} ({mantissa_base10})")
print(f"Exponent: {exponent_base10:b} ({exponent_base10})")