diff -r 372d3611c693 -r e89a7cf22da3 SConstruct
--- a/SConstruct	Thu Sep 19 17:55:04 2013 +0200
+++ b/SConstruct	Thu Sep 19 18:09:15 2013 +0200
@@ -545,6 +545,9 @@
     # Add selected sanity checks from -Wextra
     main.Append(CXXFLAGS=['-Wmissing-field-initializers',
                           '-Woverloaded-virtual'])
+    # Some standard library versions require constant macros to be
+    # enabled explicitly when using C++.
+    main.Append(CCFLAGS=['-D__STDC_CONSTANT_MACROS'])
 else:
     print termcap.Yellow + termcap.Bold + 'Error' + termcap.Normal,
     print "Don't know what compiler options to use for your compiler."
@@ -1109,6 +1112,10 @@
 main.SConscript('ext/libfdt/SConscript',
                 variant_dir = joinpath(build_root, 'libfdt'))
 
+# softfloat build is shared across all configs in the build root.
+main.SConscript('ext/softfloat/SConscript',
+                variant_dir = joinpath(build_root, 'softfloat'))
+
 ###################################################
 #
 # This function is used to set up a directory with switching headers
diff -r 372d3611c693 -r e89a7cf22da3 ext/softfloat/SConscript
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ext/softfloat/SConscript	Thu Sep 19 18:09:15 2013 +0200
@@ -0,0 +1,47 @@
+# -*- mode:python -*-
+
+# Copyright (c) 2013 Andreas Sandberg
+# All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions are
+# met: redistributions of source code must retain the above copyright
+# notice, this list of conditions and the following disclaimer;
+# redistributions in binary form must reproduce the above copyright
+# notice, this list of conditions and the following disclaimer in the
+# documentation and/or other materials provided with the distribution;
+# neither the name of the copyright holders nor the names of its
+# contributors may be used to endorse or promote products derived from
+# this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#
+# Authors: Andreas Sandberg
+
+Import('main')
+
+softfloat_files = [
+    'softfloat.cc',
+    'softfloatx80.cc',
+    'softfloat16.cc',
+    'softfloat-specialize.cc',
+    'softfloat-round-pack.cc',
+    'softfloat-muladd.cc',
+]
+
+main.Library('softfloat', [main.SharedObject(f) for f in softfloat_files])
+
+main.Prepend(CPPPATH=Dir('.'))
+main.Append(LIBS=['softfloat'])
+main.Prepend(LIBPATH=[Dir('.')])
+
diff -r 372d3611c693 -r e89a7cf22da3 ext/softfloat/softfloat-compare.hh
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ext/softfloat/softfloat-compare.hh	Thu Sep 19 18:09:15 2013 +0200
@@ -0,0 +1,500 @@
+/*============================================================================
+This C header file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic
+Package, Release 2b.
+
+Written by John R. Hauser.  This work was made possible in part by the
+International Computer Science Institute, located at Suite 600, 1947 Center
+Street, Berkeley, California 94704.  Funding was partially provided by the
+National Science Foundation under grant MIP-9311980.  The original version
+of this code was written as part of a project to build a fixed-point vector
+processor in collaboration with the University of California at Berkeley,
+overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
+is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
+arithmetic/SoftFloat.html'.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+=============================================================================*/
+
+/*============================================================================
+ * Adapted for Bochs (x86 achitecture simulator) by
+ *            Stanislav Shwartsman [sshwarts at sourceforge net]
+ * ==========================================================================*/
+
+#ifndef _SOFTFLOAT_COMPARE_H_
+#define _SOFTFLOAT_COMPARE_H_
+
+#include "softfloat.hh"
+
+BEGIN_SOFTFLOAT_NS
+
+// ======= float32 ======= //
+
+typedef int (*float32_compare_method)(float32, float32, float_status_t &status);
+
+// 0x00
+inline int float32_eq_ordered_quiet(float32 a, float32 b, float_status_t &status)
+{
+   int relation = float32_compare_quiet(a, b, status);
+   return (relation == float_relation_equal);
+}
+
+// 0x01
+inline int float32_lt_ordered_signalling(float32 a, float32 b, float_status_t &status)
+{
+   int relation = float32_compare(a, b, status);
+   return (relation == float_relation_less);
+}
+
+// 0x02
+inline int float32_le_ordered_signalling(float32 a, float32 b, float_status_t &status)
+{
+   int relation = float32_compare(a, b, status);
+   return (relation == float_relation_less) || (relation == float_relation_equal);
+}
+
+// 0x03
+inline int float32_unordered_quiet(float32 a, float32 b, float_status_t &status)
+{
+   int relation = float32_compare_quiet(a, b, status);
+   return (relation == float_relation_unordered);
+}
+
+// 0x04
+inline int float32_neq_unordered_quiet(float32 a, float32 b, float_status_t &status)
+{
+   int relation = float32_compare_quiet(a, b, status);
+   return (relation != float_relation_equal);
+}
+
+// 0x05
+inline int float32_nlt_unordered_signalling(float32 a, float32 b, float_status_t &status)
+{
+   int relation = float32_compare(a, b, status);
+   return (relation != float_relation_less);
+}
+
+// 0x06
+inline int float32_nle_unordered_signalling(float32 a, float32 b, float_status_t &status)
+{
+   int relation = float32_compare(a, b, status);
+   return (relation != float_relation_less) && (relation != float_relation_equal);
+}
+
+// 0x07
+inline int float32_ordered_quiet(float32 a, float32 b, float_status_t &status)
+{
+   int relation = float32_compare_quiet(a, b, status);
+   return (relation != float_relation_unordered);
+}
+
+// 0x08
+inline int float32_eq_unordered_quiet(float32 a, float32 b, float_status_t &status)
+{
+   int relation = float32_compare_quiet(a, b, status);
+   return (relation == float_relation_equal) || (relation == float_relation_unordered);
+}
+
+// 0x09
+inline int float32_nge_unordered_signalling(float32 a, float32 b, float_status_t &status)
+{
+   int relation = float32_compare(a, b, status);
+   return (relation == float_relation_less) || (relation == float_relation_unordered);
+}
+
+// 0x0a
+inline int float32_ngt_unordered_signalling(float32 a, float32 b, float_status_t &status)
+{
+   int relation = float32_compare(a, b, status);
+   return (relation != float_relation_greater);
+}
+
+// 0x0b
+inline int float32_false_quiet(float32 a, float32 b, float_status_t &status)
+{
+   float32_compare_quiet(a, b, status);
+   return 0;
+}
+
+// 0x0c
+inline int float32_neq_ordered_quiet(float32 a, float32 b, float_status_t &status)
+{
+   int relation = float32_compare_quiet(a, b, status);
+   return (relation != float_relation_equal) && (relation != float_relation_unordered);
+}
+
+// 0x0d
+inline int float32_ge_ordered_signalling(float32 a, float32 b, float_status_t &status)
+{
+   int relation = float32_compare(a, b, status);
+   return (relation == float_relation_greater) || (relation == float_relation_equal);
+}
+
+// 0x0e
+inline int float32_gt_ordered_signalling(float32 a, float32 b, float_status_t &status)
+{
+   int relation = float32_compare(a, b, status);
+   return (relation == float_relation_greater);
+}
+
+// 0x0f
+inline int float32_true_quiet(float32 a, float32 b, float_status_t &status)
+{
+   float32_compare_quiet(a, b, status);
+   return 1;
+}
+
+// 0x10
+inline int float32_eq_ordered_signalling(float32 a, float32 b, float_status_t &status)
+{
+   int relation = float32_compare(a, b, status);
+   return (relation == float_relation_equal);
+}
+
+// 0x11
+inline int float32_lt_ordered_quiet(float32 a, float32 b, float_status_t &status)
+{
+   int relation = float32_compare_quiet(a, b, status);
+   return (relation == float_relation_less);
+}
+
+// 0x12
+inline int float32_le_ordered_quiet(float32 a, float32 b, float_status_t &status)
+{
+   int relation = float32_compare_quiet(a, b, status);
+   return (relation == float_relation_less) || (relation == float_relation_equal);
+}
+
+// 0x13
+inline int float32_unordered_signalling(float32 a, float32 b, float_status_t &status)
+{
+   int relation = float32_compare(a, b, status);
+   return (relation == float_relation_unordered);
+}
+
+// 0x14
+inline int float32_neq_unordered_signalling(float32 a, float32 b, float_status_t &status)
+{
+   int relation = float32_compare(a, b, status);
+   return (relation != float_relation_equal);
+}
+
+// 0x15
+inline int float32_nlt_unordered_quiet(float32 a, float32 b, float_status_t &status)
+{
+   int relation = float32_compare_quiet(a, b, status);
+   return (relation != float_relation_less);
+}
+
+// 0x16
+inline int float32_nle_unordered_quiet(float32 a, float32 b, float_status_t &status)
+{
+   int relation = float32_compare_quiet(a, b, status);
+   return (relation != float_relation_less) && (relation != float_relation_equal);
+}
+
+// 0x17
+inline int float32_ordered_signalling(float32 a, float32 b, float_status_t &status)
+{
+   int relation = float32_compare(a, b, status);
+   return (relation != float_relation_unordered);
+}
+
+// 0x18
+inline int float32_eq_unordered_signalling(float32 a, float32 b, float_status_t &status)
+{
+   int relation = float32_compare(a, b, status);
+   return (relation == float_relation_equal) || (relation == float_relation_unordered);
+}
+
+// 0x19
+inline int float32_nge_unordered_quiet(float32 a, float32 b, float_status_t &status)
+{
+   int relation = float32_compare_quiet(a, b, status);
+   return (relation == float_relation_less) || (relation == float_relation_unordered);
+}
+
+// 0x1a
+inline int float32_ngt_unordered_quiet(float32 a, float32 b, float_status_t &status)
+{
+   int relation = float32_compare_quiet(a, b, status);
+   return (relation != float_relation_greater);
+}
+
+// 0x1b
+inline int float32_false_signalling(float32 a, float32 b, float_status_t &status)
+{
+   float32_compare(a, b, status);
+   return 0;
+}
+
+// 0x1c
+inline int float32_neq_ordered_signalling(float32 a, float32 b, float_status_t &status)
+{
+   int relation = float32_compare(a, b, status);
+   return (relation != float_relation_equal) && (relation != float_relation_unordered);
+}
+
+// 0x1d
+inline int float32_ge_ordered_quiet(float32 a, float32 b, float_status_t &status)
+{
+   int relation = float32_compare_quiet(a, b, status);
+   return (relation == float_relation_greater) || (relation == float_relation_equal);
+}
+
+// 0x1e
+inline int float32_gt_ordered_quiet(float32 a, float32 b, float_status_t &status)
+{
+   int relation = float32_compare_quiet(a, b, status);
+   return (relation == float_relation_greater);
+}
+
+// 0x1f
+inline int float32_true_signalling(float32 a, float32 b, float_status_t &status)
+{
+   float32_compare(a, b, status);
+   return 1;
+}
+
+// ======= float64 ======= //
+
+typedef int (*float64_compare_method)(float64, float64, float_status_t &status);
+
+// 0x00
+inline int float64_eq_ordered_quiet(float64 a, float64 b, float_status_t &status)
+{
+   int relation = float64_compare_quiet(a, b, status);
+   return (relation == float_relation_equal);
+}
+
+// 0x01
+inline int float64_lt_ordered_signalling(float64 a, float64 b, float_status_t &status)
+{
+   int relation = float64_compare(a, b, status);
+   return (relation == float_relation_less);
+}
+
+// 0x02
+inline int float64_le_ordered_signalling(float64 a, float64 b, float_status_t &status)
+{
+   int relation = float64_compare(a, b, status);
+   return (relation == float_relation_less) || (relation == float_relation_equal);
+}
+
+// 0x03
+inline int float64_unordered_quiet(float64 a, float64 b, float_status_t &status)
+{
+   int relation = float64_compare_quiet(a, b, status);
+   return (relation == float_relation_unordered);
+}
+
+// 0x04
+inline int float64_neq_unordered_quiet(float64 a, float64 b, float_status_t &status)
+{
+   int relation = float64_compare_quiet(a, b, status);
+   return (relation != float_relation_equal);
+}
+
+// 0x05
+inline int float64_nlt_unordered_signalling(float64 a, float64 b, float_status_t &status)
+{
+   int relation = float64_compare(a, b, status);
+   return (relation != float_relation_less);
+}
+
+// 0x06
+inline int float64_nle_unordered_signalling(float64 a, float64 b, float_status_t &status)
+{
+   int relation = float64_compare(a, b, status);
+   return (relation != float_relation_less) && (relation != float_relation_equal);
+}
+
+// 0x07
+inline int float64_ordered_quiet(float64 a, float64 b, float_status_t &status)
+{
+   int relation = float64_compare_quiet(a, b, status);
+   return (relation != float_relation_unordered);
+}
+
+// 0x08
+inline int float64_eq_unordered_quiet(float64 a, float64 b, float_status_t &status)
+{
+   int relation = float64_compare_quiet(a, b, status);
+   return (relation == float_relation_equal) || (relation == float_relation_unordered);
+}
+
+// 0x09
+inline int float64_nge_unordered_signalling(float64 a, float64 b, float_status_t &status)
+{
+   int relation = float64_compare(a, b, status);
+   return (relation == float_relation_less) || (relation == float_relation_unordered);
+}
+
+// 0x0a
+inline int float64_ngt_unordered_signalling(float64 a, float64 b, float_status_t &status)
+{
+   int relation = float64_compare(a, b, status);
+   return (relation != float_relation_greater);
+}
+
+// 0x0b
+inline int float64_false_quiet(float64 a, float64 b, float_status_t &status)
+{
+   float64_compare_quiet(a, b, status);
+   return 0;
+}
+
+// 0x0c
+inline int float64_neq_ordered_quiet(float64 a, float64 b, float_status_t &status)
+{
+   int relation = float64_compare_quiet(a, b, status);
+   return (relation != float_relation_equal) && (relation != float_relation_unordered);
+}
+
+// 0x0d
+inline int float64_ge_ordered_signalling(float64 a, float64 b, float_status_t &status)
+{
+   int relation = float64_compare(a, b, status);
+   return (relation == float_relation_greater) || (relation == float_relation_equal);
+}
+
+// 0x0e
+inline int float64_gt_ordered_signalling(float64 a, float64 b, float_status_t &status)
+{
+   int relation = float64_compare(a, b, status);
+   return (relation == float_relation_greater);
+}
+
+// 0x0f
+inline int float64_true_quiet(float64 a, float64 b, float_status_t &status)
+{
+   float64_compare_quiet(a, b, status);
+   return 1;
+}
+
+// 0x10
+inline int float64_eq_ordered_signalling(float64 a, float64 b, float_status_t &status)
+{
+   int relation = float64_compare(a, b, status);
+   return (relation == float_relation_equal);
+}
+
+// 0x11
+inline int float64_lt_ordered_quiet(float64 a, float64 b, float_status_t &status)
+{
+   int relation = float64_compare_quiet(a, b, status);
+   return (relation == float_relation_less);
+}
+
+// 0x12
+inline int float64_le_ordered_quiet(float64 a, float64 b, float_status_t &status)
+{
+   int relation = float64_compare_quiet(a, b, status);
+   return (relation == float_relation_less) || (relation == float_relation_equal);
+}
+
+// 0x13
+inline int float64_unordered_signalling(float64 a, float64 b, float_status_t &status)
+{
+   int relation = float64_compare(a, b, status);
+   return (relation == float_relation_unordered);
+}
+
+// 0x14
+inline int float64_neq_unordered_signalling(float64 a, float64 b, float_status_t &status)
+{
+   int relation = float64_compare(a, b, status);
+   return (relation != float_relation_equal);
+}
+
+// 0x15
+inline int float64_nlt_unordered_quiet(float64 a, float64 b, float_status_t &status)
+{
+   int relation = float64_compare_quiet(a, b, status);
+   return (relation != float_relation_less);
+}
+
+// 0x16
+inline int float64_nle_unordered_quiet(float64 a, float64 b, float_status_t &status)
+{
+   int relation = float64_compare_quiet(a, b, status);
+   return (relation != float_relation_less) && (relation != float_relation_equal);
+}
+
+// 0x17
+inline int float64_ordered_signalling(float64 a, float64 b, float_status_t &status)
+{
+   int relation = float64_compare(a, b, status);
+   return (relation != float_relation_unordered);
+}
+
+// 0x18
+inline int float64_eq_unordered_signalling(float64 a, float64 b, float_status_t &status)
+{
+   int relation = float64_compare(a, b, status);
+   return (relation == float_relation_equal) || (relation == float_relation_unordered);
+}
+
+// 0x19
+inline int float64_nge_unordered_quiet(float64 a, float64 b, float_status_t &status)
+{
+   int relation = float64_compare_quiet(a, b, status);
+   return (relation == float_relation_less) || (relation == float_relation_unordered);
+}
+
+// 0x1a
+inline int float64_ngt_unordered_quiet(float64 a, float64 b, float_status_t &status)
+{
+   int relation = float64_compare_quiet(a, b, status);
+   return (relation != float_relation_greater);
+}
+
+// 0x1b
+inline int float64_false_signalling(float64 a, float64 b, float_status_t &status)
+{
+   float64_compare(a, b, status);
+   return 0;
+}
+
+// 0x1c
+inline int float64_neq_ordered_signalling(float64 a, float64 b, float_status_t &status)
+{
+   int relation = float64_compare(a, b, status);
+   return (relation != float_relation_equal) && (relation != float_relation_unordered);
+}
+
+// 0x1d
+inline int float64_ge_ordered_quiet(float64 a, float64 b, float_status_t &status)
+{
+   int relation = float64_compare_quiet(a, b, status);
+   return (relation == float_relation_greater) || (relation == float_relation_equal);
+}
+
+// 0x1e
+inline int float64_gt_ordered_quiet(float64 a, float64 b, float_status_t &status)
+{
+   int relation = float64_compare_quiet(a, b, status);
+   return (relation == float_relation_greater);
+}
+
+// 0x1f
+inline int float64_true_signalling(float64 a, float64 b, float_status_t &status)
+{
+   float64_compare(a, b, status);
+   return 1;
+}
+
+END_SOFTFLOAT_NS
+
+#endif
diff -r 372d3611c693 -r e89a7cf22da3 ext/softfloat/softfloat-macros.hh
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ext/softfloat/softfloat-macros.hh	Thu Sep 19 18:09:15 2013 +0200
@@ -0,0 +1,690 @@
+/*============================================================================
+This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
+Arithmetic Package, Release 2b.
+
+Written by John R. Hauser.  This work was made possible in part by the
+International Computer Science Institute, located at Suite 600, 1947 Center
+Street, Berkeley, California 94704.  Funding was partially provided by the
+National Science Foundation under grant MIP-9311980.  The original version
+of this code was written as part of a project to build a fixed-point vector
+processor in collaboration with the University of California at Berkeley,
+overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
+is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
+arithmetic/SoftFloat.html'.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal notice) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+=============================================================================*/
+
+/*============================================================================
+ * Adapted for Bochs (x86 achitecture simulator) by
+ *            Stanislav Shwartsman [sshwarts at sourceforge net]
+ * ==========================================================================*/
+
+#ifndef _SOFTFLOAT_MACROS_H_
+#define _SOFTFLOAT_MACROS_H_
+
+BEGIN_SOFTFLOAT_NS
+
+/*----------------------------------------------------------------------------
+| Shifts `a' right by the number of bits given in `count'.  If any nonzero
+| bits are shifted off, they are ``jammed'' into the least significant bit of
+| the result by setting the least significant bit to 1.  The value of `count'
+| can be arbitrarily large; in particular, if `count' is greater than 16, the
+| result will be either 0 or 1, depending on whether `a' is zero or nonzero.
+*----------------------------------------------------------------------------*/
+
+inline uint16_t shift16RightJamming(uint16_t a, int count)
+{
+    uint16_t z;
+
+    if (count == 0) {
+        z = a;
+    }
+    else if (count < 16) {
+        z = (a>>count) | ((a<<((-count) & 15)) != 0);
+    }
+    else {
+        z = (a != 0);
+    }
+
+    return z;
+}
+
+/*----------------------------------------------------------------------------
+| Shifts `a' right by the number of bits given in `count'.  If any nonzero
+| bits are shifted off, they are ``jammed'' into the least significant bit of
+| the result by setting the least significant bit to 1.  The value of `count'
+| can be arbitrarily large; in particular, if `count' is greater than 32, the
+| result will be either 0 or 1, depending on whether `a' is zero or nonzero.
+*----------------------------------------------------------------------------*/
+
+inline uint32_t shift32RightJamming(uint32_t a, int count)
+{
+    uint32_t z;
+
+    if (count == 0) {
+        z = a;
+    }
+    else if (count < 32) {
+        z = (a>>count) | ((a<<((-count) & 31)) != 0);
+    }
+    else {
+        z = (a != 0);
+    }
+
+    return z;
+}
+
+/*----------------------------------------------------------------------------
+| Shifts `a' right by the number of bits given in `count'.  If any nonzero
+| bits are shifted off, they are ``jammed'' into the least significant bit of
+| the result by setting the least significant bit to 1.  The value of `count'
+| can be arbitrarily large; in particular, if `count' is greater than 64, the
+| result will be either 0 or 1, depending on whether `a' is zero or nonzero.
+*----------------------------------------------------------------------------*/
+
+inline uint64_t shift64RightJamming(uint64_t a, int count)
+{
+    uint64_t z;
+
+    if (count == 0) {
+        z = a;
+    }
+    else if (count < 64) {
+        z = (a>>count) | ((a << ((-count) & 63)) != 0);
+    }
+    else {
+        z = (a != 0);
+    }
+
+    return z;
+}
+
+/*----------------------------------------------------------------------------
+| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by 64
+| _plus_ the number of bits given in `count'.  The shifted result is at most
+| 64 nonzero bits; this is stored at the location pointed to by `z0Ptr'.  The
+| bits shifted off form a second 64-bit result as follows:  The _last_ bit
+| shifted off is the most-significant bit of the extra result, and the other
+| 63 bits of the extra result are all zero if and only if _all_but_the_last_
+| bits shifted off were all zero.  This extra result is stored in the location
+| pointed to by `z1Ptr'.  The value of `count' can be arbitrarily large.
+|     (This routine makes more sense if `a0' and `a1' are considered to form
+| a fixed-point value with binary point between `a0' and `a1'.  This fixed-
+| point value is shifted right by the number of bits given in `count', and
+| the integer part of the result is returned at the location pointed to by
+| `z0Ptr'.  The fractional part of the result may be slightly corrupted as
+| described above, and is returned at the location pointed to by `z1Ptr'.)
+*----------------------------------------------------------------------------*/
+
+inline void shift64ExtraRightJamming(uint64_t a0, uint64_t a1, int count, uint64_t *z0Ptr, uint64_t *z1Ptr)
+{
+    uint64_t z0, z1;
+    int negCount = (-count) & 63;
+
+    if (count == 0) {
+        z1 = a1;
+        z0 = a0;
+    }
+    else if (count < 64) {
+        z1 = (a0<<negCount) | (a1 != 0);
+        z0 = a0>>count;
+    }
+    else {
+        if (count == 64) {
+            z1 = a0 | (a1 != 0);
+        }
+        else {
+            z1 = ((a0 | a1) != 0);
+        }
+        z0 = 0;
+    }
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+}
+
+/*----------------------------------------------------------------------------
+| Adds the 128-bit value formed by concatenating `a0' and `a1' to the 128-bit
+| value formed by concatenating `b0' and `b1'.  Addition is modulo 2^128, so
+| any carry out is lost.  The result is broken into two 64-bit pieces which
+| are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
+*----------------------------------------------------------------------------*/
+
+inline void add128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1, uint64_t *z0Ptr, uint64_t *z1Ptr)
+{
+    uint64_t z1 = a1 + b1;
+    *z1Ptr = z1;
+    *z0Ptr = a0 + b0 + (z1 < a1);
+}
+
+/*----------------------------------------------------------------------------
+| Subtracts the 128-bit value formed by concatenating `b0' and `b1' from the
+| 128-bit value formed by concatenating `a0' and `a1'.  Subtraction is modulo
+| 2^128, so any borrow out (carry out) is lost.  The result is broken into two
+| 64-bit pieces which are stored at the locations pointed to by `z0Ptr' and
+| `z1Ptr'.
+*----------------------------------------------------------------------------*/
+
+inline void
+ sub128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1, uint64_t *z0Ptr, uint64_t *z1Ptr)
+{
+    *z1Ptr = a1 - b1;
+    *z0Ptr = a0 - b0 - (a1 < b1);
+}
+
+/*----------------------------------------------------------------------------
+| Multiplies `a' by `b' to obtain a 128-bit product.  The product is broken
+| into two 64-bit pieces which are stored at the locations pointed to by
+| `z0Ptr' and `z1Ptr'.
+*----------------------------------------------------------------------------*/
+
+inline void mul64To128(uint64_t a, uint64_t b, uint64_t *z0Ptr, uint64_t *z1Ptr)
+{
+    uint32_t aHigh, aLow, bHigh, bLow;
+    uint64_t z0, zMiddleA, zMiddleB, z1;
+
+    aLow = (uint32_t) a;
+    aHigh = (uint32_t)(a>>32);
+    bLow = (uint32_t) b;
+    bHigh = (uint32_t)(b>>32);
+    z1 = ((uint64_t) aLow) * bLow;
+    zMiddleA = ((uint64_t) aLow) * bHigh;
+    zMiddleB = ((uint64_t) aHigh) * bLow;
+    z0 = ((uint64_t) aHigh) * bHigh;
+    zMiddleA += zMiddleB;
+    z0 += (((uint64_t) (zMiddleA < zMiddleB))<<32) + (zMiddleA>>32);
+    zMiddleA <<= 32;
+    z1 += zMiddleA;
+    z0 += (z1 < zMiddleA);
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+}
+
+/*----------------------------------------------------------------------------
+| Returns an approximation to the 64-bit integer quotient obtained by dividing
+| `b' into the 128-bit value formed by concatenating `a0' and `a1'.  The
+| divisor `b' must be at least 2^63.  If q is the exact quotient truncated
+| toward zero, the approximation returned lies between q and q + 2 inclusive.
+| If the exact quotient q is larger than 64 bits, the maximum positive 64-bit
+| unsigned integer is returned.
+*----------------------------------------------------------------------------*/
+
+#ifdef USE_estimateDiv128To64
+static uint64_t estimateDiv128To64(uint64_t a0, uint64_t a1, uint64_t b)
+{
+    uint64_t b0, b1;
+    uint64_t rem0, rem1, term0, term1;
+    uint64_t z;
+
+    if (b <= a0) return UINT64_C(0xFFFFFFFFFFFFFFFF);
+    b0 = b>>32;
+    z = (b0<<32 <= a0) ? UINT64_C(0xFFFFFFFF00000000) : (a0 / b0)<<32;
+    mul64To128(b, z, &term0, &term1);
+    sub128(a0, a1, term0, term1, &rem0, &rem1);
+    while (((int64_t) rem0) < 0) {
+        z -= UINT64_C(0x100000000);
+        b1 = b<<32;
+        add128(rem0, rem1, b0, b1, &rem0, &rem1);
+    }
+    rem0 = (rem0<<32) | (rem1>>32);
+    z |= (b0<<32 <= rem0) ? 0xFFFFFFFF : rem0 / b0;
+    return z;
+}
+#endif
+
+/*----------------------------------------------------------------------------
+| Returns an approximation to the square root of the 32-bit significand given
+| by `a'.  Considered as an integer, `a' must be at least 2^31.  If bit 0 of
+| `aExp' (the least significant bit) is 1, the integer returned approximates
+| 2^31*sqrt(`a'/2^31), where `a' is considered an integer.  If bit 0 of `aExp'
+| is 0, the integer returned approximates 2^31*sqrt(`a'/2^30).  In either
+| case, the approximation returned lies strictly within +/-2 of the exact
+| value.
+*----------------------------------------------------------------------------*/
+
+#ifdef USE_estimateSqrt32
+static uint32_t estimateSqrt32(int16_t aExp, uint32_t a)
+{
+    static const uint16_t sqrtOddAdjustments[] = {
+        0x0004, 0x0022, 0x005D, 0x00B1, 0x011D, 0x019F, 0x0236, 0x02E0,
+        0x039C, 0x0468, 0x0545, 0x0631, 0x072B, 0x0832, 0x0946, 0x0A67
+    };
+    static const uint16_t sqrtEvenAdjustments[] = {
+        0x0A2D, 0x08AF, 0x075A, 0x0629, 0x051A, 0x0429, 0x0356, 0x029E,
+        0x0200, 0x0179, 0x0109, 0x00AF, 0x0068, 0x0034, 0x0012, 0x0002
+    };
+    uint32_t z;
+
+    int index = (a>>27) & 15;
+    if (aExp & 1) {
+        z = 0x4000 + (a>>17) - sqrtOddAdjustments[index];
+        z = ((a / z)<<14) + (z<<15);
+        a >>= 1;
+    }
+    else {
+        z = 0x8000 + (a>>17) - sqrtEvenAdjustments[index];
+        z = a / z + z;
+        z = (0x20000 <= z) ? 0xFFFF8000 : (z<<15);
+        if (z <= a) return (uint32_t) (((int32_t) a)>>1);
+    }
+    return ((uint32_t) ((((uint64_t) a)<<31) / z)) + (z>>1);
+}
+#endif
+
+static const int countLeadingZeros8[] = {
+  8, 7, 6, 6, 5, 5, 5, 5, 4, 4, 4, 4, 4, 4, 4, 4,
+  3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
+  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+  2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+  0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+
+#ifdef FLOAT16
+
+/*----------------------------------------------------------------------------
+| Returns the number of leading 0 bits before the most-significant 1 bit of
+| `a'.  If `a' is zero, 16 is returned.
+*----------------------------------------------------------------------------*/
+
+inline int countLeadingZeros16(uint16_t a)
+{
+    int shiftCount = 0;
+    if (a < 0x100) {
+        shiftCount += 8;
+        a <<= 8;
+    }
+    shiftCount += countLeadingZeros8[a>>8];
+    return shiftCount;
+}
+
+#endif
+
+/*----------------------------------------------------------------------------
+| Returns the number of leading 0 bits before the most-significant 1 bit of
+| `a'.  If `a' is zero, 32 is returned.
+*----------------------------------------------------------------------------*/
+
+inline int countLeadingZeros32(uint32_t a)
+{
+    int shiftCount = 0;
+    if (a < 0x10000) {
+        shiftCount += 16;
+        a <<= 16;
+    }
+    if (a < 0x1000000) {
+        shiftCount += 8;
+        a <<= 8;
+    }
+    shiftCount += countLeadingZeros8[a>>24];
+    return shiftCount;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the number of leading 0 bits before the most-significant 1 bit of
+| `a'.  If `a' is zero, 64 is returned.
+*----------------------------------------------------------------------------*/
+
+inline int countLeadingZeros64(uint64_t a)
+{
+    int shiftCount = 0;
+    if (a < UINT64_C(0x100000000)) {
+        shiftCount += 32;
+    }
+    else {
+        a >>= 32;
+    }
+    shiftCount += countLeadingZeros32((uint32_t)(a));
+    return shiftCount;
+}
+
+#ifdef FLOATX80
+
+/*----------------------------------------------------------------------------
+| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
+| number of bits given in `count'.  Any bits shifted off are lost.  The value
+| of `count' can be arbitrarily large; in particular, if `count' is greater
+| than 128, the result will be 0.  The result is broken into two 64-bit pieces
+| which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
+*----------------------------------------------------------------------------*/
+
+inline void shift128Right(uint64_t a0, uint64_t a1, int count, uint64_t *z0Ptr, uint64_t *z1Ptr)
+{
+    uint64_t z0, z1;
+    int negCount = (-count) & 63;
+
+    if (count == 0) {
+        z1 = a1;
+        z0 = a0;
+    }
+    else if (count < 64) {
+        z1 = (a0<<negCount) | (a1>>count);
+        z0 = a0>>count;
+    }
+    else {
+        z1 = (count < 64) ? (a0>>(count & 63)) : 0;
+        z0 = 0;
+    }
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+}
+
+/*----------------------------------------------------------------------------
+| Shifts the 128-bit value formed by concatenating `a0' and `a1' right by the
+| number of bits given in `count'.  If any nonzero bits are shifted off, they
+| are ``jammed'' into the least significant bit of the result by setting the
+| least significant bit to 1.  The value of `count' can be arbitrarily large;
+| in particular, if `count' is greater than 128, the result will be either
+| 0 or 1, depending on whether the concatenation of `a0' and `a1' is zero or
+| nonzero.  The result is broken into two 64-bit pieces which are stored at
+| the locations pointed to by `z0Ptr' and `z1Ptr'.
+*----------------------------------------------------------------------------*/
+
+inline void shift128RightJamming(uint64_t a0, uint64_t a1, int count, uint64_t *z0Ptr, uint64_t *z1Ptr)
+{
+    uint64_t z0, z1;
+    int negCount = (-count) & 63;
+
+    if (count == 0) {
+        z1 = a1;
+        z0 = a0;
+    }
+    else if (count < 64) {
+        z1 = (a0<<negCount) | (a1>>count) | ((a1<<negCount) != 0);
+        z0 = a0>>count;
+    }
+    else {
+        if (count == 64) {
+            z1 = a0 | (a1 != 0);
+        }
+        else if (count < 128) {
+            z1 = (a0>>(count & 63)) | (((a0<<negCount) | a1) != 0);
+        }
+        else {
+            z1 = ((a0 | a1) != 0);
+        }
+        z0 = 0;
+    }
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+}
+
+/*----------------------------------------------------------------------------
+| Shifts the 128-bit value formed by concatenating `a0' and `a1' left by the
+| number of bits given in `count'.  Any bits shifted off are lost.  The value
+| of `count' must be less than 64.  The result is broken into two 64-bit
+| pieces which are stored at the locations pointed to by `z0Ptr' and `z1Ptr'.
+*----------------------------------------------------------------------------*/
+
+inline void shortShift128Left(uint64_t a0, uint64_t a1, int count, uint64_t *z0Ptr, uint64_t *z1Ptr)
+{
+    *z1Ptr = a1<<count;
+    *z0Ptr = (count == 0) ? a0 : (a0<<count) | (a1>>((-count) & 63));
+}
+
+/*----------------------------------------------------------------------------
+| Adds the 192-bit value formed by concatenating `a0', `a1', and `a2' to the
+| 192-bit value formed by concatenating `b0', `b1', and `b2'.  Addition is
+| modulo 2^192, so any carry out is lost.  The result is broken into three
+| 64-bit pieces which are stored at the locations pointed to by `z0Ptr',
+| `z1Ptr', and `z2Ptr'.
+*----------------------------------------------------------------------------*/
+
+inline void add192(
+     uint64_t a0,
+     uint64_t a1,
+     uint64_t a2,
+     uint64_t b0,
+     uint64_t b1,
+     uint64_t b2,
+     uint64_t *z0Ptr,
+     uint64_t *z1Ptr,
+     uint64_t *z2Ptr
+)
+{
+    uint64_t z0, z1, z2;
+    unsigned carry0, carry1;
+
+    z2 = a2 + b2;
+    carry1 = (z2 < a2);
+    z1 = a1 + b1;
+    carry0 = (z1 < a1);
+    z0 = a0 + b0;
+    z1 += carry1;
+    z0 += (z1 < carry1);
+    z0 += carry0;
+    *z2Ptr = z2;
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+}
+
+/*----------------------------------------------------------------------------
+| Subtracts the 192-bit value formed by concatenating `b0', `b1', and `b2'
+| from the 192-bit value formed by concatenating `a0', `a1', and `a2'.
+| Subtraction is modulo 2^192, so any borrow out (carry out) is lost.  The
+| result is broken into three 64-bit pieces which are stored at the locations
+| pointed to by `z0Ptr', `z1Ptr', and `z2Ptr'.
+*----------------------------------------------------------------------------*/
+
+inline void sub192(
+     uint64_t a0,
+     uint64_t a1,
+     uint64_t a2,
+     uint64_t b0,
+     uint64_t b1,
+     uint64_t b2,
+     uint64_t *z0Ptr,
+     uint64_t *z1Ptr,
+     uint64_t *z2Ptr
+)
+{
+    uint64_t z0, z1, z2;
+    unsigned borrow0, borrow1;
+
+    z2 = a2 - b2;
+    borrow1 = (a2 < b2);
+    z1 = a1 - b1;
+    borrow0 = (a1 < b1);
+    z0 = a0 - b0;
+    z0 -= (z1 < borrow1);
+    z1 -= borrow1;
+    z0 -= borrow0;
+    *z2Ptr = z2;
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1'
+| is equal to the 128-bit value formed by concatenating `b0' and `b1'.
+| Otherwise, returns 0.
+*----------------------------------------------------------------------------*/
+
+inline int eq128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1)
+{
+    return (a0 == b0) && (a1 == b1);
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
+| than or equal to the 128-bit value formed by concatenating `b0' and `b1'.
+| Otherwise, returns 0.
+*----------------------------------------------------------------------------*/
+
+inline int le128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1)
+{
+    return (a0 < b0) || ((a0 == b0) && (a1 <= b1));
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the 128-bit value formed by concatenating `a0' and `a1' is less
+| than the 128-bit value formed by concatenating `b0' and `b1'.  Otherwise,
+| returns 0.
+*----------------------------------------------------------------------------*/
+
+inline int lt128(uint64_t a0, uint64_t a1, uint64_t b0, uint64_t b1)
+{
+    return (a0 < b0) || ((a0 == b0) && (a1 < b1));
+}
+
+#endif	/* FLOATX80 */
+
+/*----------------------------------------------------------------------------
+| Multiplies the 128-bit value formed by concatenating `a0' and `a1' by
+| `b' to obtain a 192-bit product.  The product is broken into three 64-bit
+| pieces which are stored at the locations pointed to by `z0Ptr', `z1Ptr', and
+| `z2Ptr'.
+*----------------------------------------------------------------------------*/
+
+inline void mul128By64To192(
+     uint64_t a0,
+     uint64_t a1,
+     uint64_t b,
+     uint64_t *z0Ptr,
+     uint64_t *z1Ptr,
+     uint64_t *z2Ptr
+)
+{
+    uint64_t z0, z1, z2, more1;
+
+    mul64To128(a1, b, &z1, &z2);
+    mul64To128(a0, b, &z0, &more1);
+    add128(z0, more1, 0, z1, &z0, &z1);
+    *z2Ptr = z2;
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+}
+
+#ifdef FLOAT128
+
+/*----------------------------------------------------------------------------
+| Multiplies the 128-bit value formed by concatenating `a0' and `a1' to the
+| 128-bit value formed by concatenating `b0' and `b1' to obtain a 256-bit
+| product.  The product is broken into four 64-bit pieces which are stored at
+| the locations pointed to by `z0Ptr', `z1Ptr', `z2Ptr', and `z3Ptr'.
+*----------------------------------------------------------------------------*/
+
+inline void mul128To256(
+     uint64_t a0,
+     uint64_t a1,
+     uint64_t b0,
+     uint64_t b1,
+     uint64_t *z0Ptr,
+     uint64_t *z1Ptr,
+     uint64_t *z2Ptr,
+     uint64_t *z3Ptr
+)
+{
+    uint64_t z0, z1, z2, z3;
+    uint64_t more1, more2;
+
+    mul64To128(a1, b1, &z2, &z3);
+    mul64To128(a1, b0, &z1, &more2);
+    add128(z1, more2, 0, z2, &z1, &z2);
+    mul64To128(a0, b0, &z0, &more1);
+    add128(z0, more1, 0, z1, &z0, &z1);
+    mul64To128(a0, b1, &more1, &more2);
+    add128(more1, more2, 0, z2, &more1, &z2);
+    add128(z0, z1, 0, more1, &z0, &z1);
+    *z3Ptr = z3;
+    *z2Ptr = z2;
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+}
+
+
+/*----------------------------------------------------------------------------
+| Shifts the 192-bit value formed by concatenating `a0', `a1', and `a2' right
+| by 64 _plus_ the number of bits given in `count'.  The shifted result is
+| at most 128 nonzero bits; these are broken into two 64-bit pieces which are
+| stored at the locations pointed to by `z0Ptr' and `z1Ptr'.  The bits shifted
+| off form a third 64-bit result as follows:  The _last_ bit shifted off is
+| the most-significant bit of the extra result, and the other 63 bits of the
+| extra result are all zero if and only if _all_but_the_last_ bits shifted off
+| were all zero.  This extra result is stored in the location pointed to by
+| `z2Ptr'.  The value of `count' can be arbitrarily large.
+|     (This routine makes more sense if `a0', `a1', and `a2' are considered
+| to form a fixed-point value with binary point between `a1' and `a2'.  This
+| fixed-point value is shifted right by the number of bits given in `count',
+| and the integer part of the result is returned at the locations pointed to
+| by `z0Ptr' and `z1Ptr'.  The fractional part of the result may be slightly
+| corrupted as described above, and is returned at the location pointed to by
+| `z2Ptr'.)
+*----------------------------------------------------------------------------*/
+
+inline void shift128ExtraRightJamming(
+     uint64_t a0,
+     uint64_t a1,
+     uint64_t a2,
+     int count,
+     uint64_t *z0Ptr,
+     uint64_t *z1Ptr,
+     uint64_t *z2Ptr
+)
+{
+    uint64_t z0, z1, z2;
+    int negCount = (-count) & 63;
+
+    if (count == 0) {
+        z2 = a2;
+        z1 = a1;
+        z0 = a0;
+    }
+    else {
+        if (count < 64) {
+            z2 = a1<<negCount;
+            z1 = (a0<<negCount) | (a1>>count);
+            z0 = a0>>count;
+        }
+        else {
+            if (count == 64) {
+                z2 = a1;
+                z1 = a0;
+            }
+            else {
+                a2 |= a1;
+                if (count < 128) {
+                    z2 = a0<<negCount;
+                    z1 = a0>>(count & 63);
+                }
+                else {
+                    z2 = (count == 128) ? a0 : (a0 != 0);
+                    z1 = 0;
+                }
+            }
+            z0 = 0;
+        }
+        z2 |= (a2 != 0);
+    }
+    *z2Ptr = z2;
+    *z1Ptr = z1;
+    *z0Ptr = z0;
+}
+
+#endif  /* FLOAT128 */
+
+END_SOFTFLOAT_NS
+
+#endif
diff -r 372d3611c693 -r e89a7cf22da3 ext/softfloat/softfloat-muladd.cc
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ext/softfloat/softfloat-muladd.cc	Thu Sep 19 18:09:15 2013 +0200
@@ -0,0 +1,562 @@
+/*============================================================================
+This C source file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic
+Package, Release 2b.
+
+Written by John R. Hauser.  This work was made possible in part by the
+International Computer Science Institute, located at Suite 600, 1947 Center
+Street, Berkeley, California 94704.  Funding was partially provided by the
+National Science Foundation under grant MIP-9311980.  The original version
+of this code was written as part of a project to build a fixed-point vector
+processor in collaboration with the University of California at Berkeley,
+overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
+is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
+arithmetic/SoftFloat.html'.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+=============================================================================*/
+
+/*============================================================================
+ * This code is based on QEMU patch by Peter Maydell
+ * Adapted for Bochs (x86 achitecture simulator) by
+ *            Stanislav Shwartsman [sshwarts at sourceforge net]
+ * ==========================================================================*/
+
+#include "softfloat.hh"
+#include "softfloat-round-pack.hh"
+
+/*----------------------------------------------------------------------------
+| Primitive arithmetic functions, including multi-word arithmetic, and
+| division and square root approximations. (Can be specialized to target
+| if desired).
+*----------------------------------------------------------------------------*/
+#include "softfloat-macros.hh"
+
+/*----------------------------------------------------------------------------
+| Functions and definitions to determine:  (1) whether tininess for underflow
+| is detected before or after rounding by default, (2) what (if anything)
+| happens when exceptions are raised, (3) how signaling NaNs are distinguished
+| from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
+| are propagated from function inputs to output.  These details are target-
+| specific.
+*----------------------------------------------------------------------------*/
+#include "softfloat-specialize.hh"
+
+/*----------------------------------------------------------------------------
+| Takes three single-precision floating-point values `a', `b' and `c', one of
+| which is a NaN, and returns the appropriate NaN result.  If any of  `a',
+| `b' or `c' is a signaling NaN, the invalid exception is raised.
+| The input infzero indicates whether a*b was 0*inf or inf*0 (in which case
+| obviously c is a NaN, and whether to propagate c or some other NaN is
+| implementation defined).
+*----------------------------------------------------------------------------*/
+
+BEGIN_SOFTFLOAT_NS
+
+static float32 propagateFloat32MulAddNaN(float32 a, float32 b, float32 c, float_status_t &status)
+{
+    int aIsNaN = float32_is_nan(a);
+    int bIsNaN = float32_is_nan(b);
+
+    int aIsSignalingNaN = float32_is_signaling_nan(a);
+    int bIsSignalingNaN = float32_is_signaling_nan(b);
+    int cIsSignalingNaN = float32_is_signaling_nan(c);
+
+    a |= 0x00400000;
+    b |= 0x00400000;
+    c |= 0x00400000;
+
+    if (aIsSignalingNaN | bIsSignalingNaN | cIsSignalingNaN)
+        float_raise(status, float_flag_invalid);
+
+    //  operate according to float_first_operand_nan mode
+    if (aIsSignalingNaN | aIsNaN) {
+        return a;
+    }
+    else {
+        return (bIsSignalingNaN | bIsNaN) ? b : c;
+    }
+}
+ 
+/*----------------------------------------------------------------------------
+| Takes three double-precision floating-point values `a', `b' and `c', one of
+| which is a NaN, and returns the appropriate NaN result.  If any of  `a',
+| `b' or `c' is a signaling NaN, the invalid exception is raised.
+| The input infzero indicates whether a*b was 0*inf or inf*0 (in which case
+| obviously c is a NaN, and whether to propagate c or some other NaN is
+| implementation defined).
+*----------------------------------------------------------------------------*/
+
+static float64 propagateFloat64MulAddNaN(float64 a, float64 b, float64 c, float_status_t &status)
+{
+    int aIsNaN = float64_is_nan(a);
+    int bIsNaN = float64_is_nan(b);
+
+    int aIsSignalingNaN = float64_is_signaling_nan(a);
+    int bIsSignalingNaN = float64_is_signaling_nan(b);
+    int cIsSignalingNaN = float64_is_signaling_nan(c);
+
+    a |= UINT64_C(0x0008000000000000);
+    b |= UINT64_C(0x0008000000000000);
+    c |= UINT64_C(0x0008000000000000);
+
+    if (aIsSignalingNaN | bIsSignalingNaN | cIsSignalingNaN)
+        float_raise(status, float_flag_invalid);
+
+    //  operate according to float_first_operand_nan mode
+    if (aIsSignalingNaN | aIsNaN) {
+        return a;
+    }
+    else {
+        return (bIsSignalingNaN | bIsNaN) ? b : c;
+    }
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of multiplying the single-precision floating-point values
+| `a' and `b' then adding 'c', with no intermediate rounding step after the
+| multiplication.  The operation is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic 754-2008.
+| The flags argument allows the caller to select negation of the
+| addend, the intermediate product, or the final result. (The difference
+| between this and having the caller do a separate negation is that negating
+| externally will flip the sign bit on NaNs.)
+*----------------------------------------------------------------------------*/
+
+float32 float32_muladd(float32 a, float32 b, float32 c, int flags, float_status_t &status)
+{
+    int aSign, bSign, cSign, zSign;
+    int16_t aExp, bExp, cExp, pExp, zExp;
+    uint32_t aSig, bSig, cSig;
+    int pInf, pZero, pSign;
+    uint64_t pSig64, cSig64, zSig64;
+    uint32_t pSig;
+    int shiftcount;
+
+    aSig = extractFloat32Frac(a);
+    aExp = extractFloat32Exp(a);
+    aSign = extractFloat32Sign(a);
+    bSig = extractFloat32Frac(b);
+    bExp = extractFloat32Exp(b);
+    bSign = extractFloat32Sign(b);
+    cSig = extractFloat32Frac(c);
+    cExp = extractFloat32Exp(c);
+    cSign = extractFloat32Sign(c);
+
+    /* It is implementation-defined whether the cases of (0,inf,qnan)
+     * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN
+     * they return if they do), so we have to hand this information
+     * off to the target-specific pick-a-NaN routine.
+     */
+    if (((aExp == 0xff) && aSig) ||
+        ((bExp == 0xff) && bSig) ||
+        ((cExp == 0xff) && cSig)) {
+        return propagateFloat32MulAddNaN(a, b, c, status);
+    }
+
+    if (get_denormals_are_zeros(status)) {
+        if (aExp == 0) aSig = 0;
+        if (bExp == 0) bSig = 0;
+        if (cExp == 0) cSig = 0;
+    }
+
+    int infzero = ((aExp == 0 && aSig == 0 && bExp == 0xff && bSig == 0) ||
+                   (aExp == 0xff && aSig == 0 && bExp == 0 && bSig == 0));
+
+    if (infzero) {
+        float_raise(status, float_flag_invalid);
+        return float32_default_nan;
+    }
+
+    if (flags & float_muladd_negate_c) {
+        cSign ^= 1;
+    }
+
+    /* Work out the sign and type of the product */
+    pSign = aSign ^ bSign;
+    if (flags & float_muladd_negate_product) {
+        pSign ^= 1;
+    }
+    pInf = (aExp == 0xff) || (bExp == 0xff);
+    pZero = ((aExp | aSig) == 0) || ((bExp | bSig) == 0);
+
+    if (cExp == 0xff) {
+        if (pInf && (pSign ^ cSign)) {
+            /* addition of opposite-signed infinities => InvalidOperation */
+            float_raise(status, float_flag_invalid);
+            return float32_default_nan;
+        }
+        /* Otherwise generate an infinity of the same sign */
+        if ((aSig && aExp == 0) || (bSig && bExp == 0)) {
+            float_raise(status, float_flag_denormal);
+        }
+        return packFloat32(cSign, 0xff, 0);
+    }
+
+    if (pInf) {
+        if ((aSig && aExp == 0) || (bSig && bExp == 0) || (cSig && cExp == 0)) {
+            float_raise(status, float_flag_denormal);
+        }
+        return packFloat32(pSign, 0xff, 0);
+    }
+
+    if (pZero) {
+        if (cExp == 0) {
+            if (cSig == 0) {
+                /* Adding two exact zeroes */
+                if (pSign == cSign) {
+                    zSign = pSign;
+                } else if (get_float_rounding_mode(status) == float_round_down) {
+                    zSign = 1;
+                } else {
+                    zSign = 0;
+                }
+                return packFloat32(zSign, 0, 0);
+            }
+            /* Exact zero plus a denormal */
+            float_raise(status, float_flag_denormal);
+            if (get_flush_underflow_to_zero(status)) {
+                float_raise(status, float_flag_underflow | float_flag_inexact);
+                return packFloat32(cSign, 0, 0);
+            }
+        }
+        /* Zero plus something non-zero */
+        return packFloat32(cSign, cExp, cSig);
+    }
+
+    if (aExp == 0) {
+        float_raise(status, float_flag_denormal);
+        normalizeFloat32Subnormal(aSig, &aExp, &aSig);
+    }
+    if (bExp == 0) {
+        float_raise(status, float_flag_denormal);
+        normalizeFloat32Subnormal(bSig, &bExp, &bSig);
+    }
+
+    /* Calculate the actual result a * b + c */
+
+    /* Multiply first; this is easy. */
+    /* NB: we subtract 0x7e where float32_mul() subtracts 0x7f
+     * because we want the true exponent, not the "one-less-than"
+     * flavour that roundAndPackFloat32() takes.
+     */
+    pExp = aExp + bExp - 0x7e;
+    aSig = (aSig | 0x00800000) << 7;
+    bSig = (bSig | 0x00800000) << 8;
+    pSig64 = (uint64_t)aSig * bSig;
+    if ((int64_t)(pSig64 << 1) >= 0) {
+        pSig64 <<= 1;
+        pExp--;
+    }
+
+    zSign = pSign;
+
+    /* Now pSig64 is the significand of the multiply, with the explicit bit in
+     * position 62.
+     */
+    if (cExp == 0) {
+        if (!cSig) {
+            /* Throw out the special case of c being an exact zero now */
+            pSig = (uint32_t) shift64RightJamming(pSig64, 32);
+            return roundAndPackFloat32(zSign, pExp - 1, pSig, status);
+        }
+        float_raise(status, float_flag_denormal);
+        normalizeFloat32Subnormal(cSig, &cExp, &cSig);
+    }
+
+    cSig64 = (uint64_t)cSig << 39;
+    cSig64 |= UINT64_C(0x4000000000000000);
+    int expDiff = pExp - cExp;
+
+    if (pSign == cSign) {
+        /* Addition */
+        if (expDiff > 0) {
+            /* scale c to match p */
+            cSig64 = shift64RightJamming(cSig64, expDiff);
+            zExp = pExp;
+        } else if (expDiff < 0) {
+            /* scale p to match c */
+            pSig64 = shift64RightJamming(pSig64, -expDiff);
+            zExp = cExp;
+        } else {
+            /* no scaling needed */
+            zExp = cExp;
+        }
+        /* Add significands and make sure explicit bit ends up in posn 62 */
+        zSig64 = pSig64 + cSig64;
+        if ((int64_t)zSig64 < 0) {
+            zSig64 = shift64RightJamming(zSig64, 1);
+        } else {
+            zExp--;
+        }
+        zSig64 = shift64RightJamming(zSig64, 32);
+        return roundAndPackFloat32(zSign, zExp, zSig64, status);
+    } else {
+        /* Subtraction */
+        if (expDiff > 0) {
+            cSig64 = shift64RightJamming(cSig64, expDiff);
+            zSig64 = pSig64 - cSig64;
+            zExp = pExp;
+        } else if (expDiff < 0) {
+            pSig64 = shift64RightJamming(pSig64, -expDiff);
+            zSig64 = cSig64 - pSig64;
+            zExp = cExp;
+            zSign ^= 1;
+        } else {
+            zExp = pExp;
+            if (cSig64 < pSig64) {
+                zSig64 = pSig64 - cSig64;
+            } else if (pSig64 < cSig64) {
+                zSig64 = cSig64 - pSig64;
+                zSign ^= 1;
+            } else {
+                /* Exact zero */
+                return packFloat32(get_float_rounding_mode(status) == float_round_down, 0, 0);
+            }
+        }
+        --zExp;
+        /* Do the equivalent of normalizeRoundAndPackFloat32() but
+         * starting with the significand in a uint64_t.
+         */
+        shiftcount = countLeadingZeros64(zSig64) - 1;
+        zSig64 <<= shiftcount;
+        zExp -= shiftcount;
+        zSig64 = shift64RightJamming(zSig64, 32);
+        return roundAndPackFloat32(zSign, zExp, zSig64, status);
+    }
+}
+ 
+/*----------------------------------------------------------------------------
+| Returns the result of multiplying the double-precision floating-point values
+| `a' and `b' then adding 'c', with no intermediate rounding step after the
+| multiplication.  The operation is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic 754-2008.
+| The flags argument allows the caller to select negation of the
+| addend, the intermediate product, or the final result. (The difference
+| between this and having the caller do a separate negation is that negating
+| externally will flip the sign bit on NaNs.)
+*----------------------------------------------------------------------------*/
+
+float64 float64_muladd(float64 a, float64 b, float64 c, int flags, float_status_t &status)
+{
+    int aSign, bSign, cSign, zSign;
+    int16_t aExp, bExp, cExp, pExp, zExp;
+    uint64_t aSig, bSig, cSig;
+    int pInf, pZero, pSign;
+    uint64_t pSig0, pSig1, cSig0, cSig1, zSig0, zSig1;
+    int shiftcount;
+
+    aSig = extractFloat64Frac(a);
+    aExp = extractFloat64Exp(a);
+    aSign = extractFloat64Sign(a);
+    bSig = extractFloat64Frac(b);
+    bExp = extractFloat64Exp(b);
+    bSign = extractFloat64Sign(b);
+    cSig = extractFloat64Frac(c);
+    cExp = extractFloat64Exp(c);
+    cSign = extractFloat64Sign(c);
+
+    /* It is implementation-defined whether the cases of (0,inf,qnan)
+     * and (inf,0,qnan) raise InvalidOperation or not (and what QNaN
+     * they return if they do), so we have to hand this information
+     * off to the target-specific pick-a-NaN routine.
+     */
+    if (((aExp == 0x7ff) && aSig) ||
+        ((bExp == 0x7ff) && bSig) ||
+        ((cExp == 0x7ff) && cSig)) {
+        return propagateFloat64MulAddNaN(a, b, c, status);
+    }
+
+    if (get_denormals_are_zeros(status)) {
+        if (aExp == 0) aSig = 0;
+        if (bExp == 0) bSig = 0;
+        if (cExp == 0) cSig = 0;
+    }
+
+    int infzero = ((aExp == 0 && aSig == 0 && bExp == 0x7ff && bSig == 0) ||
+                   (aExp == 0x7ff && aSig == 0 && bExp == 0 && bSig == 0));
+
+    if (infzero) {
+        float_raise(status, float_flag_invalid);
+        return float64_default_nan;
+    }
+
+    if (flags & float_muladd_negate_c) {
+        cSign ^= 1;
+    }
+
+    /* Work out the sign and type of the product */
+    pSign = aSign ^ bSign;
+    if (flags & float_muladd_negate_product) {
+        pSign ^= 1;
+    }
+    pInf = (aExp == 0x7ff) || (bExp == 0x7ff);
+    pZero = ((aExp | aSig) == 0) || ((bExp | bSig) == 0);
+
+    if (cExp == 0x7ff) {
+        if (pInf && (pSign ^ cSign)) {
+            /* addition of opposite-signed infinities => InvalidOperation */
+            float_raise(status, float_flag_invalid);
+            return float64_default_nan;
+        }
+        /* Otherwise generate an infinity of the same sign */
+        if ((aSig && aExp == 0) || (bSig && bExp == 0)) {
+            float_raise(status, float_flag_denormal);
+        }
+        return packFloat64(cSign, 0x7ff, 0);
+    }
+
+    if (pInf) {
+        if ((aSig && aExp == 0) || (bSig && bExp == 0) || (cSig && cExp == 0)) {
+            float_raise(status, float_flag_denormal);
+        }
+        return packFloat64(pSign, 0x7ff, 0);
+    }
+
+    if (pZero) {
+        if (cExp == 0) {
+            if (cSig == 0) {
+                /* Adding two exact zeroes */
+                if (pSign == cSign) {
+                    zSign = pSign;
+                } else if (get_float_rounding_mode(status) == float_round_down) {
+                    zSign = 1;
+                } else {
+                    zSign = 0;
+                }
+                return packFloat64(zSign, 0, 0);
+            }
+            /* Exact zero plus a denormal */
+            float_raise(status, float_flag_denormal);
+            if (get_flush_underflow_to_zero(status)) {
+                float_raise(status, float_flag_underflow | float_flag_inexact);
+                return packFloat64(cSign, 0, 0);
+            }
+        }
+        /* Zero plus something non-zero */
+        return packFloat64(cSign, cExp, cSig);
+    }
+
+    if (aExp == 0) {
+        float_raise(status, float_flag_denormal);
+        normalizeFloat64Subnormal(aSig, &aExp, &aSig);
+    }
+    if (bExp == 0) {
+        float_raise(status, float_flag_denormal);
+        normalizeFloat64Subnormal(bSig, &bExp, &bSig);
+    }
+
+    /* Calculate the actual result a * b + c */
+
+    /* Multiply first; this is easy. */
+    /* NB: we subtract 0x3fe where float64_mul() subtracts 0x3ff
+     * because we want the true exponent, not the "one-less-than"
+     * flavour that roundAndPackFloat64() takes.
+     */
+    pExp = aExp + bExp - 0x3fe;
+    aSig = (aSig | UINT64_C(0x0010000000000000))<<10;
+    bSig = (bSig | UINT64_C(0x0010000000000000))<<11;
+    mul64To128(aSig, bSig, &pSig0, &pSig1);
+    if ((int64_t)(pSig0 << 1) >= 0) {
+        shortShift128Left(pSig0, pSig1, 1, &pSig0, &pSig1);
+        pExp--;
+    }
+
+    zSign = pSign;
+
+    /* Now [pSig0:pSig1] is the significand of the multiply, with the explicit
+     * bit in position 126.
+     */
+    if (cExp == 0) {
+        if (!cSig) {
+            /* Throw out the special case of c being an exact zero now */
+            shift128RightJamming(pSig0, pSig1, 64, &pSig0, &pSig1);
+            return roundAndPackFloat64(zSign, pExp - 1, pSig1, status);
+        }
+        float_raise(status, float_flag_denormal);
+        normalizeFloat64Subnormal(cSig, &cExp, &cSig);
+    }
+
+    cSig0 = cSig << 10;
+    cSig1 = 0;
+    cSig0 |= UINT64_C(0x4000000000000000);
+    int expDiff = pExp - cExp;
+
+    if (pSign == cSign) {
+        /* Addition */
+        if (expDiff > 0) {
+            /* scale c to match p */
+            shift128RightJamming(cSig0, cSig1, expDiff, &cSig0, &cSig1);
+            zExp = pExp;
+        } else if (expDiff < 0) {
+            /* scale p to match c */
+            shift128RightJamming(pSig0, pSig1, -expDiff, &pSig0, &pSig1);
+            zExp = cExp;
+        } else {
+            /* no scaling needed */
+            zExp = cExp;
+        }
+        /* Add significands and make sure explicit bit ends up in posn 126 */
+        add128(pSig0, pSig1, cSig0, cSig1, &zSig0, &zSig1);
+        if ((int64_t)zSig0 < 0) {
+            shift128RightJamming(zSig0, zSig1, 1, &zSig0, &zSig1);
+        } else {
+            zExp--;
+        }
+        shift128RightJamming(zSig0, zSig1, 64, &zSig0, &zSig1);
+        return roundAndPackFloat64(zSign, zExp, zSig1, status);
+    } else {
+        /* Subtraction */
+        if (expDiff > 0) {
+            shift128RightJamming(cSig0, cSig1, expDiff, &cSig0, &cSig1);
+            sub128(pSig0, pSig1, cSig0, cSig1, &zSig0, &zSig1);
+            zExp = pExp;
+        } else if (expDiff < 0) {
+            shift128RightJamming(pSig0, pSig1, -expDiff, &pSig0, &pSig1);
+            sub128(cSig0, cSig1, pSig0, pSig1, &zSig0, &zSig1);
+            zExp = cExp;
+            zSign ^= 1;
+        } else {
+            zExp = pExp;
+            if (lt128(cSig0, cSig1, pSig0, pSig1)) {
+                sub128(pSig0, pSig1, cSig0, cSig1, &zSig0, &zSig1);
+            } else if (lt128(pSig0, pSig1, cSig0, cSig1)) {
+                sub128(cSig0, cSig1, pSig0, pSig1, &zSig0, &zSig1);
+                zSign ^= 1;
+            } else {
+                /* Exact zero */
+                return packFloat64(get_float_rounding_mode(status) == float_round_down, 0, 0);
+            }
+        }
+        --zExp;
+        /* Do the equivalent of normalizeRoundAndPackFloat64() but
+         * starting with the significand in a pair of uint64_t.
+         */
+        if (zSig0) {
+            shiftcount = countLeadingZeros64(zSig0) - 1;
+            shortShift128Left(zSig0, zSig1, shiftcount, &zSig0, &zSig1);
+            if (zSig1) {
+                zSig0 |= 1;
+            }
+            zExp -= shiftcount;
+        } else {
+            shiftcount = countLeadingZeros64(zSig1) - 1;
+            zSig0 = zSig1 << shiftcount;
+            zExp -= (shiftcount + 64);
+        }
+        return roundAndPackFloat64(zSign, zExp, zSig0, status);
+    }
+}
+
+END_SOFTFLOAT_NS
diff -r 372d3611c693 -r e89a7cf22da3 ext/softfloat/softfloat-round-pack.hh
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ext/softfloat/softfloat-round-pack.hh	Thu Sep 19 18:09:15 2013 +0200
@@ -0,0 +1,301 @@
+/*============================================================================
+This C source file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic
+Package, Release 2b.
+
+Written by John R. Hauser.  This work was made possible in part by the
+International Computer Science Institute, located at Suite 600, 1947 Center
+Street, Berkeley, California 94704.  Funding was partially provided by the
+National Science Foundation under grant MIP-9311980.  The original version
+of this code was written as part of a project to build a fixed-point vector
+processor in collaboration with the University of California at Berkeley,
+overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
+is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
+arithmetic/SoftFloat.html'.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+=============================================================================*/
+
+/*============================================================================
+ * Adapted for Bochs (x86 achitecture simulator) by
+ *            Stanislav Shwartsman [sshwarts at sourceforge net]
+ * ==========================================================================*/
+
+#ifndef _SOFTFLOAT_ROUND_PACK_H_
+#define _SOFTFLOAT_ROUND_PACK_H_
+
+#include "softfloat.hh"
+
+BEGIN_SOFTFLOAT_NS
+
+/*----------------------------------------------------------------------------
+| Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
+| and 7, and returns the properly rounded 32-bit integer corresponding to the
+| input.  If `zSign' is 1, the input is negated before being converted to an
+| integer.  Bit 63 of `absZ' must be zero.  Ordinarily, the fixed-point input
+| is simply rounded to an integer, with the inexact exception raised if the
+| input cannot be represented exactly as an integer.  However, if the fixed-
+| point input is too large, the invalid exception is raised and the integer
+| indefinite value is returned.
+*----------------------------------------------------------------------------*/
+
+int32_t roundAndPackInt32(int zSign, uint64_t absZ, float_status_t &status);
+
+/*----------------------------------------------------------------------------
+| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
+| `absZ1', with binary point between bits 63 and 64 (between the input words),
+| and returns the properly rounded 64-bit integer corresponding to the input.
+| If `zSign' is 1, the input is negated before being converted to an integer.
+| Ordinarily, the fixed-point input is simply rounded to an integer, with
+| the inexact exception raised if the input cannot be represented exactly as
+| an integer.  However, if the fixed-point input is too large, the invalid
+| exception is raised and the integer indefinite value is returned.
+*----------------------------------------------------------------------------*/
+
+int64_t roundAndPackInt64(int zSign, uint64_t absZ0, uint64_t absZ1, float_status_t &status);
+
+#ifdef FLOAT16
+
+/*----------------------------------------------------------------------------
+| Normalizes the subnormal half-precision floating-point value represented
+| by the denormalized significand `aSig'.  The normalized exponent and
+| significand are stored at the locations pointed to by `zExpPtr' and
+| `zSigPtr', respectively.
+*----------------------------------------------------------------------------*/
+
+void normalizeFloat16Subnormal(uint16_t aSig, int16_t *zExpPtr, uint16_t *zSigPtr);
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and significand `zSig', and returns the proper half-precision floating-
+| point value corresponding to the abstract input.  Ordinarily, the abstract
+| value is simply rounded and packed into the half-precision format, with
+| the inexact exception raised if the abstract input cannot be represented
+| exactly.  However, if the abstract value is too large, the overflow and
+| inexact exceptions are raised and an infinity or maximal finite value is
+| returned.  If the abstract value is too small, the input value is rounded to
+| a subnormal number, and the underflow and inexact exceptions are raised if
+| the abstract input cannot be represented exactly as a subnormal single-
+| precision floating-point number.
+|     The input significand `zSig' has its binary point between bits 14
+| and 13, which is 4 bits to the left of the usual location.  This shifted
+| significand must be normalized or smaller.  If `zSig' is not normalized,
+| `zExp' must be 0; in that case, the result returned is a subnormal number,
+| and it must not require rounding.  In the usual case that `zSig' is
+| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
+| The handling of underflow and overflow follows the IEC/IEEE Standard for
+| Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float16 roundAndPackFloat16(int zSign, int16_t zExp, uint16_t zSig, float_status_t &status);
+
+#endif
+
+/*----------------------------------------------------------------------------
+| Normalizes the subnormal single-precision floating-point value represented
+| by the denormalized significand `aSig'.  The normalized exponent and
+| significand are stored at the locations pointed to by `zExpPtr' and
+| `zSigPtr', respectively.
+*----------------------------------------------------------------------------*/
+
+void normalizeFloat32Subnormal(uint32_t aSig, int16_t *zExpPtr, uint32_t *zSigPtr);
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and significand `zSig', and returns the proper single-precision floating-
+| point value corresponding to the abstract input.  Ordinarily, the abstract
+| value is simply rounded and packed into the single-precision format, with
+| the inexact exception raised if the abstract input cannot be represented
+| exactly.  However, if the abstract value is too large, the overflow and
+| inexact exceptions are raised and an infinity or maximal finite value is
+| returned.  If the abstract value is too small, the input value is rounded to
+| a subnormal number, and the underflow and inexact exceptions are raised if
+| the abstract input cannot be represented exactly as a subnormal single-
+| precision floating-point number.
+|     The input significand `zSig' has its binary point between bits 30
+| and 29, which is 7 bits to the left of the usual location.  This shifted
+| significand must be normalized or smaller.  If `zSig' is not normalized,
+| `zExp' must be 0; in that case, the result returned is a subnormal number,
+| and it must not require rounding.  In the usual case that `zSig' is
+| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
+| The handling of underflow and overflow follows the IEC/IEEE Standard for
+| Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 roundAndPackFloat32(int zSign, int16_t zExp, uint32_t zSig, float_status_t &status);
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and significand `zSig', and returns the proper single-precision floating-
+| point value corresponding to the abstract input.  This routine is just like
+| `roundAndPackFloat32' except that `zSig' does not have to be normalized.
+| Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
+| floating-point exponent.
+*----------------------------------------------------------------------------*/
+
+float32 normalizeRoundAndPackFloat32(int zSign, int16_t zExp, uint32_t zSig, float_status_t &status);
+
+/*----------------------------------------------------------------------------
+| Normalizes the subnormal double-precision floating-point value represented
+| by the denormalized significand `aSig'.  The normalized exponent and
+| significand are stored at the locations pointed to by `zExpPtr' and
+| `zSigPtr', respectively.
+*----------------------------------------------------------------------------*/
+
+void normalizeFloat64Subnormal(uint64_t aSig, int16_t *zExpPtr, uint64_t *zSigPtr);
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and significand `zSig', and returns the proper double-precision floating-
+| point value corresponding to the abstract input.  Ordinarily, the abstract
+| value is simply rounded and packed into the double-precision format, with
+| the inexact exception raised if the abstract input cannot be represented
+| exactly.  However, if the abstract value is too large, the overflow and
+| inexact exceptions are raised and an infinity or maximal finite value is
+| returned.  If the abstract value is too small, the input value is rounded
+| to a subnormal number, and the underflow and inexact exceptions are raised
+| if the abstract input cannot be represented exactly as a subnormal double-
+| precision floating-point number.
+|     The input significand `zSig' has its binary point between bits 62
+| and 61, which is 10 bits to the left of the usual location.  This shifted
+| significand must be normalized or smaller.  If `zSig' is not normalized,
+| `zExp' must be 0; in that case, the result returned is a subnormal number,
+| and it must not require rounding.  In the usual case that `zSig' is
+| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
+| The handling of underflow and overflow follows the IEC/IEEE Standard for
+| Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 roundAndPackFloat64(int zSign, int16_t zExp, uint64_t zSig, float_status_t &status);
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and significand `zSig', and returns the proper double-precision floating-
+| point value corresponding to the abstract input.  This routine is just like
+| `roundAndPackFloat64' except that `zSig' does not have to be normalized.
+| Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
+| floating-point exponent.
+*----------------------------------------------------------------------------*/
+
+float64 normalizeRoundAndPackFloat64(int zSign, int16_t zExp, uint64_t zSig, float_status_t &status);
+
+#ifdef FLOATX80
+
+/*----------------------------------------------------------------------------
+| Normalizes the subnormal extended double-precision floating-point value
+| represented by the denormalized significand `aSig'.  The normalized exponent
+| and significand are stored at the locations pointed to by `zExpPtr' and
+| `zSigPtr', respectively.
+*----------------------------------------------------------------------------*/
+
+void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr, uint64_t *zSigPtr);
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and extended significand formed by the concatenation of `zSig0' and `zSig1',
+| and returns the proper extended double-precision floating-point value
+| corresponding to the abstract input.  Ordinarily, the abstract value is
+| rounded and packed into the extended double-precision format, with the
+| inexact exception raised if the abstract input cannot be represented
+| exactly.  However, if the abstract value is too large, the overflow and
+| inexact exceptions are raised and an infinity or maximal finite value is
+| returned.  If the abstract value is too small, the input value is rounded to
+| a subnormal number, and the underflow and inexact exceptions are raised if
+| the abstract input cannot be represented exactly as a subnormal extended
+| double-precision floating-point number.
+|     If `roundingPrecision' is 32 or 64, the result is rounded to the same
+| number of bits as single or double precision, respectively.  Otherwise, the
+| result is rounded to the full precision of the extended double-precision
+| format.
+|     The input significand must be normalized or smaller.  If the input
+| significand is not normalized, `zExp' must be 0; in that case, the result
+| returned is a subnormal number, and it must not require rounding.  The
+| handling of underflow and overflow follows the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 roundAndPackFloatx80(int roundingPrecision,
+        int zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1, float_status_t &status);
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent
+| `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
+| and returns the proper extended double-precision floating-point value
+| corresponding to the abstract input.  This routine is just like
+| `roundAndPackFloatx80' except that the input significand does not have to be
+| normalized.
+*----------------------------------------------------------------------------*/
+
+floatx80 normalizeRoundAndPackFloatx80(int roundingPrecision,
+        int zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1, float_status_t &status);
+
+#endif // FLOATX80
+
+#ifdef FLOAT128
+
+/*----------------------------------------------------------------------------
+| Normalizes the subnormal quadruple-precision floating-point value
+| represented by the denormalized significand formed by the concatenation of
+| `aSig0' and `aSig1'.  The normalized exponent is stored at the location
+| pointed to by `zExpPtr'.  The most significant 49 bits of the normalized
+| significand are stored at the location pointed to by `zSig0Ptr', and the
+| least significant 64 bits of the normalized significand are stored at the
+| location pointed to by `zSig1Ptr'.
+*----------------------------------------------------------------------------*/
+
+void normalizeFloat128Subnormal(
+     uint64_t aSig0, uint64_t aSig1, int32_t *zExpPtr, uint64_t *zSig0Ptr, uint64_t *zSig1Ptr);
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and extended significand formed by the concatenation of `zSig0', `zSig1',
+| and `zSig2', and returns the proper quadruple-precision floating-point value
+| corresponding to the abstract input.  Ordinarily, the abstract value is
+| simply rounded and packed into the quadruple-precision format, with the
+| inexact exception raised if the abstract input cannot be represented
+| exactly.  However, if the abstract value is too large, the overflow and
+| inexact exceptions are raised and an infinity or maximal finite value is
+| returned.  If the abstract value is too small, the input value is rounded to
+| a subnormal number, and the underflow and inexact exceptions are raised if
+| the abstract input cannot be represented exactly as a subnormal quadruple-
+| precision floating-point number.
+|     The input significand must be normalized or smaller.  If the input
+| significand is not normalized, `zExp' must be 0; in that case, the result
+| returned is a subnormal number, and it must not require rounding.  In the
+| usual case that the input significand is normalized, `zExp' must be 1 less
+| than the ``true'' floating-point exponent.  The handling of underflow and
+| overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 roundAndPackFloat128(
+     int zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1, uint64_t zSig2, float_status_t &status);
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and significand formed by the concatenation of `zSig0' and `zSig1', and
+| returns the proper quadruple-precision floating-point value corresponding
+| to the abstract input.  This routine is just like `roundAndPackFloat128'
+| except that the input significand has fewer bits and does not have to be
+| normalized.  In all cases, `zExp' must be 1 less than the ``true'' floating-
+| point exponent.
+*----------------------------------------------------------------------------*/
+
+float128 normalizeRoundAndPackFloat128(
+     int zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1, float_status_t &status);
+
+#endif // FLOAT128
+
+END_SOFTFLOAT_NS
+
+#endif
diff -r 372d3611c693 -r e89a7cf22da3 ext/softfloat/softfloat-round-pack.cc
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ext/softfloat/softfloat-round-pack.cc	Thu Sep 19 18:09:15 2013 +0200
@@ -0,0 +1,854 @@
+/*============================================================================
+This C source file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic
+Package, Release 2b.
+
+Written by John R. Hauser.  This work was made possible in part by the
+International Computer Science Institute, located at Suite 600, 1947 Center
+Street, Berkeley, California 94704.  Funding was partially provided by the
+National Science Foundation under grant MIP-9311980.  The original version
+of this code was written as part of a project to build a fixed-point vector
+processor in collaboration with the University of California at Berkeley,
+overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
+is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
+arithmetic/SoftFloat.html'.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+=============================================================================*/
+
+#define FLOAT128
+
+/*============================================================================
+ * Adapted for Bochs (x86 achitecture simulator) by
+ *            Stanislav Shwartsman [sshwarts at sourceforge net]
+ * ==========================================================================*/
+
+#include "softfloat.hh"
+#include "softfloat-round-pack.hh"
+
+/*----------------------------------------------------------------------------
+| Primitive arithmetic functions, including multi-word arithmetic, and
+| division and square root approximations. (Can be specialized to target
+| if desired).
+*----------------------------------------------------------------------------*/
+#include "softfloat-macros.hh"
+
+/*----------------------------------------------------------------------------
+| Functions and definitions to determine:  (1) whether tininess for underflow
+| is detected before or after rounding by default, (2) what (if anything)
+| happens when exceptions are raised, (3) how signaling NaNs are distinguished
+| from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
+| are propagated from function inputs to output.  These details are target-
+| specific.
+*----------------------------------------------------------------------------*/
+#include "softfloat-specialize.hh"
+
+BEGIN_SOFTFLOAT_NS
+
+/*----------------------------------------------------------------------------
+| Takes a 64-bit fixed-point value `absZ' with binary point between bits 6
+| and 7, and returns the properly rounded 32-bit integer corresponding to the
+| input.  If `zSign' is 1, the input is negated before being converted to an
+| integer.  Bit 63 of `absZ' must be zero.  Ordinarily, the fixed-point input
+| is simply rounded to an integer, with the inexact exception raised if the
+| input cannot be represented exactly as an integer.  However, if the fixed-
+| point input is too large, the invalid exception is raised and the integer
+| indefinite value is returned.
+*----------------------------------------------------------------------------*/
+
+int32_t roundAndPackInt32(int zSign, uint64_t exactAbsZ, float_status_t &status)
+{
+    int roundingMode = get_float_rounding_mode(status);
+    int roundNearestEven = (roundingMode == float_round_nearest_even);
+    int roundIncrement = 0x40;
+    if (! roundNearestEven) {
+        if (roundingMode == float_round_to_zero) roundIncrement = 0;
+        else {
+            roundIncrement = 0x7F;
+            if (zSign) {
+                if (roundingMode == float_round_up) roundIncrement = 0;
+            }
+            else {
+                if (roundingMode == float_round_down) roundIncrement = 0;
+            }
+        }
+    }
+    int roundBits = (int)(exactAbsZ & 0x7F);
+    uint64_t absZ = (exactAbsZ + roundIncrement)>>7;
+    absZ &= ~(((roundBits ^ 0x40) == 0) & roundNearestEven);
+    int32_t z = (int32_t) absZ;
+    if (zSign) z = -z;
+    if ((absZ>>32) || (z && ((z < 0) ^ zSign))) {
+        float_raise(status, float_flag_invalid);
+        return (int32_t)(int32_indefinite);
+    }
+    if (roundBits) {
+        float_raise(status, float_flag_inexact);
+        if ((absZ << 7) > exactAbsZ)
+            set_float_rounding_up(status);
+    }
+    return z;
+}
+
+/*----------------------------------------------------------------------------
+| Takes the 128-bit fixed-point value formed by concatenating `absZ0' and
+| `absZ1', with binary point between bits 63 and 64 (between the input words),
+| and returns the properly rounded 64-bit integer corresponding to the input.
+| If `zSign' is 1, the input is negated before being converted to an integer.
+| Ordinarily, the fixed-point input is simply rounded to an integer, with
+| the inexact exception raised if the input cannot be represented exactly as
+| an integer.  However, if the fixed-point input is too large, the invalid
+| exception is raised and the integer indefinite value is returned.
+*----------------------------------------------------------------------------*/
+
+int64_t roundAndPackInt64(int zSign, uint64_t absZ0, uint64_t absZ1, float_status_t &status)
+{
+    int64_t z;
+    int roundingMode = get_float_rounding_mode(status);
+    int roundNearestEven = (roundingMode == float_round_nearest_even);
+    int increment = ((int64_t) absZ1 < 0);
+    if (! roundNearestEven) {
+        if (roundingMode == float_round_to_zero) increment = 0;
+        else {
+            if (zSign) {
+                increment = (roundingMode == float_round_down) && absZ1;
+            }
+            else {
+                increment = (roundingMode == float_round_up) && absZ1;
+            }
+        }
+    }
+    uint64_t exactAbsZ0 = absZ0;
+    if (increment) {
+        ++absZ0;
+        if (absZ0 == 0) goto overflow;
+        absZ0 &= ~(((uint64_t) (absZ1<<1) == 0) & roundNearestEven);
+    }
+    z = absZ0;
+    if (zSign) z = -z;
+    if (z && ((z < 0) ^ zSign)) {
+ overflow:
+        float_raise(status, float_flag_invalid);
+        return (int64_t)(int64_indefinite);
+    }
+    if (absZ1) {
+        float_raise(status, float_flag_inexact);
+        if (absZ0 > exactAbsZ0)
+            set_float_rounding_up(status);
+    }
+    return z;
+}
+
+#ifdef FLOAT16
+
+/*----------------------------------------------------------------------------
+| Normalizes the subnormal half-precision floating-point value represented
+| by the denormalized significand `aSig'.  The normalized exponent and
+| significand are stored at the locations pointed to by `zExpPtr' and
+| `zSigPtr', respectively.
+*----------------------------------------------------------------------------*/
+
+void normalizeFloat16Subnormal(uint16_t aSig, int16_t *zExpPtr, uint16_t *zSigPtr)
+{
+    int shiftCount = countLeadingZeros16(aSig) - 5;
+    *zSigPtr = aSig<<shiftCount;
+    *zExpPtr = 1 - shiftCount;
+}
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and significand `zSig', and returns the proper half-precision floating-
+| point value corresponding to the abstract input.  Ordinarily, the abstract
+| value is simply rounded and packed into the half-precision format, with
+| the inexact exception raised if the abstract input cannot be represented
+| exactly.  However, if the abstract value is too large, the overflow and
+| inexact exceptions are raised and an infinity or maximal finite value is
+| returned.  If the abstract value is too small, the input value is rounded to
+| a subnormal number, and the underflow and inexact exceptions are raised if
+| the abstract input cannot be represented exactly as a subnormal single-
+| precision floating-point number.
+|     The input significand `zSig' has its binary point between bits 14
+| and 13, which is 4 bits to the left of the usual location.  This shifted
+| significand must be normalized or smaller.  If `zSig' is not normalized,
+| `zExp' must be 0; in that case, the result returned is a subnormal number,
+| and it must not require rounding.  In the usual case that `zSig' is
+| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
+| The handling of underflow and overflow follows the IEC/IEEE Standard for
+| Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float16 roundAndPackFloat16(int zSign, int16_t zExp, uint16_t zSig, float_status_t &status)
+{
+    int16_t roundIncrement, roundBits, roundMask;
+
+    int roundingMode = get_float_rounding_mode(status);
+    int roundNearestEven = (roundingMode == float_round_nearest_even);
+    roundIncrement = 8;
+    roundMask = 0xF;
+
+    if (! roundNearestEven) {
+        if (roundingMode == float_round_to_zero) roundIncrement = 0;
+        else {
+            roundIncrement = roundMask;
+            if (zSign) {
+                if (roundingMode == float_round_up) roundIncrement = 0;
+            }
+            else {
+                if (roundingMode == float_round_down) roundIncrement = 0;
+            }
+        }
+    }
+    roundBits = zSig & roundMask;
+    if (0x1D <= (uint16_t) zExp) {
+        if ((0x1D < zExp)
+             || ((zExp == 0x1D) && ((int16_t) (zSig + roundIncrement) < 0)))
+        {
+            float_raise(status, float_flag_overflow);
+            if (roundBits || float_exception_masked(status, float_flag_overflow)) {
+                float_raise(status, float_flag_inexact);
+            }
+            return packFloat16(zSign, 0x1F, 0) - (roundIncrement == 0);
+        }
+        if (zExp < 0) {
+            int isTiny = (zExp < -1) || (zSig + roundIncrement < 0x8000);
+            zSig = shift16RightJamming(zSig, -zExp);
+            zExp = 0;
+            roundBits = zSig & roundMask;
+            if (isTiny) {
+                if(get_flush_underflow_to_zero(status)) {
+                    float_raise(status, float_flag_underflow | float_flag_inexact);
+                    return packFloat16(zSign, 0, 0);
+                }
+                // signal the #P according to roundBits calculated AFTER denormalization
+                if (roundBits || !float_exception_masked(status, float_flag_underflow)) {
+                    float_raise(status, float_flag_underflow);
+                }
+            }
+        }
+    }
+    if (roundBits) float_raise(status, float_flag_inexact);
+    uint16_t zSigRound = ((zSig + roundIncrement) & ~roundMask) >> 4;
+    zSigRound &= ~(((roundBits ^ 0x10) == 0) & roundNearestEven);
+    if (zSigRound == 0) zExp = 0;
+    return packFloat16(zSign, zExp, zSigRound);
+}
+
+#endif
+
+/*----------------------------------------------------------------------------
+| Normalizes the subnormal single-precision floating-point value represented
+| by the denormalized significand `aSig'.  The normalized exponent and
+| significand are stored at the locations pointed to by `zExpPtr' and
+| `zSigPtr', respectively.
+*----------------------------------------------------------------------------*/
+
+void normalizeFloat32Subnormal(uint32_t aSig, int16_t *zExpPtr, uint32_t *zSigPtr)
+{
+    int shiftCount = countLeadingZeros32(aSig) - 8;
+    *zSigPtr = aSig<<shiftCount;
+    *zExpPtr = 1 - shiftCount;
+}
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and significand `zSig', and returns the proper single-precision floating-
+| point value corresponding to the abstract input.  Ordinarily, the abstract
+| value is simply rounded and packed into the single-precision format, with
+| the inexact exception raised if the abstract input cannot be represented
+| exactly.  However, if the abstract value is too large, the overflow and
+| inexact exceptions are raised and an infinity or maximal finite value is
+| returned.  If the abstract value is too small, the input value is rounded to
+| a subnormal number, and the underflow and inexact exceptions are raised if
+| the abstract input cannot be represented exactly as a subnormal single-
+| precision floating-point number.
+|     The input significand `zSig' has its binary point between bits 30
+| and 29, which is 7 bits to the left of the usual location.  This shifted
+| significand must be normalized or smaller.  If `zSig' is not normalized,
+| `zExp' must be 0; in that case, the result returned is a subnormal number,
+| and it must not require rounding.  In the usual case that `zSig' is
+| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
+| The handling of underflow and overflow follows the IEC/IEEE Standard for
+| Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 roundAndPackFloat32(int zSign, int16_t zExp, uint32_t zSig, float_status_t &status)
+{
+    int32_t roundIncrement, roundBits;
+    const int32_t roundMask = 0x7F;
+
+    int roundingMode = get_float_rounding_mode(status);
+    int roundNearestEven = (roundingMode == float_round_nearest_even);
+    roundIncrement = 0x40;
+
+    if (! roundNearestEven) {
+        if (roundingMode == float_round_to_zero) roundIncrement = 0;
+        else {
+            roundIncrement = roundMask;
+            if (zSign) {
+                if (roundingMode == float_round_up) roundIncrement = 0;
+            }
+            else {
+                if (roundingMode == float_round_down) roundIncrement = 0;
+            }
+        }
+    }
+    roundBits = zSig & roundMask;
+    if (0xFD <= (uint16_t) zExp) {
+        if ((0xFD < zExp)
+             || ((zExp == 0xFD) && ((int32_t) (zSig + roundIncrement) < 0)))
+        {
+            float_raise(status, float_flag_overflow);
+            if (roundBits || float_exception_masked(status, float_flag_overflow)) {
+                float_raise(status, float_flag_inexact);
+                if (roundIncrement != 0) set_float_rounding_up(status);
+            }
+            return packFloat32(zSign, 0xFF, 0) - (roundIncrement == 0);
+        }
+        if (zExp < 0) {
+            int isTiny = (zExp < -1) || (zSig + roundIncrement < 0x80000000);
+            if (isTiny) {
+                if (!float_exception_masked(status, float_flag_underflow)) {
+                    float_raise(status, float_flag_underflow);
+                    zExp += 192; // bias unmasked underflow
+                }
+            }
+            if (zExp < 0) {
+                zSig = shift32RightJamming(zSig, -zExp);
+                zExp = 0;
+                roundBits = zSig & roundMask;
+                if (isTiny) {
+                    // masked underflow
+                    if(get_flush_underflow_to_zero(status)) {
+                        float_raise(status, float_flag_underflow | float_flag_inexact);
+                        return packFloat32(zSign, 0, 0);
+                    }
+                    if (roundBits) float_raise(status, float_flag_underflow);
+                }
+            }
+        }
+    }
+    uint32_t zSigRound = ((zSig + roundIncrement) & ~roundMask) >> 7;
+    zSigRound &= ~(((roundBits ^ 0x40) == 0) & roundNearestEven);
+    if (zSigRound == 0) zExp = 0;
+    if (roundBits) {
+        float_raise(status, float_flag_inexact);
+        if ((zSigRound << 7) > zSig) set_float_rounding_up(status);
+    }
+    return packFloat32(zSign, zExp, zSigRound);
+}
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and significand `zSig', and returns the proper single-precision floating-
+| point value corresponding to the abstract input.  This routine is just like
+| `roundAndPackFloat32' except that `zSig' does not have to be normalized.
+| Bit 31 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
+| floating-point exponent.
+*----------------------------------------------------------------------------*/
+
+float32 normalizeRoundAndPackFloat32(int zSign, int16_t zExp, uint32_t zSig, float_status_t &status)
+{
+    int shiftCount = countLeadingZeros32(zSig) - 1;
+    return roundAndPackFloat32(zSign, zExp - shiftCount, zSig<<shiftCount, status);
+}
+
+/*----------------------------------------------------------------------------
+| Normalizes the subnormal double-precision floating-point value represented
+| by the denormalized significand `aSig'.  The normalized exponent and
+| significand are stored at the locations pointed to by `zExpPtr' and
+| `zSigPtr', respectively.
+*----------------------------------------------------------------------------*/
+
+void normalizeFloat64Subnormal(uint64_t aSig, int16_t *zExpPtr, uint64_t *zSigPtr)
+{
+    int shiftCount = countLeadingZeros64(aSig) - 11;
+    *zSigPtr = aSig<<shiftCount;
+    *zExpPtr = 1 - shiftCount;
+}
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and significand `zSig', and returns the proper double-precision floating-
+| point value corresponding to the abstract input.  Ordinarily, the abstract
+| value is simply rounded and packed into the double-precision format, with
+| the inexact exception raised if the abstract input cannot be represented
+| exactly.  However, if the abstract value is too large, the overflow and
+| inexact exceptions are raised and an infinity or maximal finite value is
+| returned.  If the abstract value is too small, the input value is rounded
+| to a subnormal number, and the underflow and inexact exceptions are raised
+| if the abstract input cannot be represented exactly as a subnormal double-
+| precision floating-point number.
+|     The input significand `zSig' has its binary point between bits 62
+| and 61, which is 10 bits to the left of the usual location.  This shifted
+| significand must be normalized or smaller.  If `zSig' is not normalized,
+| `zExp' must be 0; in that case, the result returned is a subnormal number,
+| and it must not require rounding.  In the usual case that `zSig' is
+| normalized, `zExp' must be 1 less than the ``true'' floating-point exponent.
+| The handling of underflow and overflow follows the IEC/IEEE Standard for
+| Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 roundAndPackFloat64(int zSign, int16_t zExp, uint64_t zSig, float_status_t &status)
+{
+    int16_t roundIncrement, roundBits;
+    const int16_t roundMask = 0x3FF;
+    int roundingMode = get_float_rounding_mode(status);
+    int roundNearestEven = (roundingMode == float_round_nearest_even);
+    roundIncrement = 0x200;
+    if (! roundNearestEven) {
+        if (roundingMode == float_round_to_zero) roundIncrement = 0;
+        else {
+            roundIncrement = roundMask;
+            if (zSign) {
+                if (roundingMode == float_round_up) roundIncrement = 0;
+            }
+            else {
+                if (roundingMode == float_round_down) roundIncrement = 0;
+            }
+        }
+    }
+    roundBits = (int16_t)(zSig & roundMask);
+    if (0x7FD <= (uint16_t) zExp) {
+        if ((0x7FD < zExp)
+             || ((zExp == 0x7FD)
+                  && ((int64_t) (zSig + roundIncrement) < 0)))
+        {
+            float_raise(status, float_flag_overflow);
+            if (roundBits || float_exception_masked(status, float_flag_overflow)) {
+                float_raise(status, float_flag_inexact);
+                if (roundIncrement != 0) set_float_rounding_up(status);
+            }
+            return packFloat64(zSign, 0x7FF, 0) - (roundIncrement == 0);
+        }
+        if (zExp < 0) {
+            int isTiny = (zExp < -1) || (zSig + roundIncrement < UINT64_C(0x8000000000000000));
+            if (isTiny) {
+                if (!float_exception_masked(status, float_flag_underflow)) {
+                    float_raise(status, float_flag_underflow);
+                    zExp += 1536; // bias unmasked underflow
+                }
+            }
+            if (zExp < 0) {
+                zSig = shift64RightJamming(zSig, -zExp);
+                zExp = 0;
+                roundBits = (int16_t)(zSig & roundMask);
+                if (isTiny) {
+                    // masked underflow
+                    if(get_flush_underflow_to_zero(status)) {
+                        float_raise(status, float_flag_underflow | float_flag_inexact);
+                        return packFloat64(zSign, 0, 0);
+                    }
+                    if (roundBits) float_raise(status, float_flag_underflow);
+                }
+            }
+        }
+    }
+    uint64_t zSigRound = (zSig + roundIncrement)>>10;
+    zSigRound &= ~(((roundBits ^ 0x200) == 0) & roundNearestEven);
+    if (zSigRound == 0) zExp = 0;
+    if (roundBits) {
+        float_raise(status, float_flag_inexact);
+        if ((zSigRound << 10) > zSig) set_float_rounding_up(status);
+    }
+    return packFloat64(zSign, zExp, zSigRound);
+}
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and significand `zSig', and returns the proper double-precision floating-
+| point value corresponding to the abstract input.  This routine is just like
+| `roundAndPackFloat64' except that `zSig' does not have to be normalized.
+| Bit 63 of `zSig' must be zero, and `zExp' must be 1 less than the ``true''
+| floating-point exponent.
+*----------------------------------------------------------------------------*/
+
+float64 normalizeRoundAndPackFloat64(int zSign, int16_t zExp, uint64_t zSig, float_status_t &status)
+{
+    int shiftCount = countLeadingZeros64(zSig) - 1;
+    return roundAndPackFloat64(zSign, zExp - shiftCount, zSig<<shiftCount, status);
+}
+
+#ifdef FLOATX80
+
+/*----------------------------------------------------------------------------
+| Normalizes the subnormal extended double-precision floating-point value
+| represented by the denormalized significand `aSig'.  The normalized exponent
+| and significand are stored at the locations pointed to by `zExpPtr' and
+| `zSigPtr', respectively.
+*----------------------------------------------------------------------------*/
+
+void normalizeFloatx80Subnormal(uint64_t aSig, int32_t *zExpPtr, uint64_t *zSigPtr)
+{
+    int shiftCount = countLeadingZeros64(aSig);
+    *zSigPtr = aSig<<shiftCount;
+    *zExpPtr = 1 - shiftCount;
+}
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and extended significand formed by the concatenation of `zSig0' and `zSig1',
+| and returns the proper extended double-precision floating-point value
+| corresponding to the abstract input.  Ordinarily, the abstract value is
+| rounded and packed into the extended double-precision format, with the
+| inexact exception raised if the abstract input cannot be represented
+| exactly.  However, if the abstract value is too large, the overflow and
+| inexact exceptions are raised and an infinity or maximal finite value is
+| returned.  If the abstract value is too small, the input value is rounded to
+| a subnormal number, and the underflow and inexact exceptions are raised if
+| the abstract input cannot be represented exactly as a subnormal extended
+| double-precision floating-point number.
+|     If `roundingPrecision' is 32 or 64, the result is rounded to the same
+| number of bits as single or double precision, respectively.  Otherwise, the
+| result is rounded to the full precision of the extended double-precision
+| format.
+|     The input significand must be normalized or smaller.  If the input
+| significand is not normalized, `zExp' must be 0; in that case, the result
+| returned is a subnormal number, and it must not require rounding.  The
+| handling of underflow and overflow follows the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 SoftFloatRoundAndPackFloatx80(int roundingPrecision,
+        int zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1, float_status_t &status)
+{
+    uint64_t roundIncrement, roundMask, roundBits;
+    int increment;
+    uint64_t zSigExact; /* support rounding-up response */
+
+    uint8_t roundingMode = get_float_rounding_mode(status);
+    int roundNearestEven = (roundingMode == float_round_nearest_even);
+    if (roundingPrecision == 64) {
+        roundIncrement = UINT64_C(0x0000000000000400);
+        roundMask = UINT64_C(0x00000000000007FF);
+    }
+    else if (roundingPrecision == 32) {
+        roundIncrement = UINT64_C(0x0000008000000000);
+        roundMask = UINT64_C(0x000000FFFFFFFFFF);
+    }
+    else goto precision80;
+
+    zSig0 |= (zSig1 != 0);
+    if (! roundNearestEven) {
+        if (roundingMode == float_round_to_zero) roundIncrement = 0;
+        else {
+            roundIncrement = roundMask;
+            if (zSign) {
+                if (roundingMode == float_round_up) roundIncrement = 0;
+            }
+            else {
+                if (roundingMode == float_round_down) roundIncrement = 0;
+            }
+        }
+    }
+    roundBits = zSig0 & roundMask;
+    if (0x7FFD <= (uint32_t) (zExp - 1)) {
+        if ((0x7FFE < zExp)
+             || ((zExp == 0x7FFE) && (zSig0 + roundIncrement < zSig0)))
+        {
+            goto overflow;
+        }
+        if (zExp <= 0) {
+            int isTiny = (zExp < 0) || (zSig0 <= zSig0 + roundIncrement);
+            zSig0 = shift64RightJamming(zSig0, 1 - zExp);
+            zSigExact = zSig0;
+            zExp = 0;
+            roundBits = zSig0 & roundMask;
+            if (isTiny) {
+                if (roundBits || (zSig0 && !float_exception_masked(status, float_flag_underflow)))
+                    float_raise(status, float_flag_underflow);
+            }
+            zSig0 += roundIncrement;
+            if ((int64_t) zSig0 < 0) zExp = 1;
+            roundIncrement = roundMask + 1;
+            if (roundNearestEven && (roundBits<<1 == roundIncrement))
+                roundMask |= roundIncrement;
+            zSig0 &= ~roundMask;
+            if (roundBits) {
+                float_raise(status, float_flag_inexact);
+                if (zSig0 > zSigExact) set_float_rounding_up(status);
+            }
+            return packFloatx80(zSign, zExp, zSig0);
+        }
+    }
+    if (roundBits) float_raise(status, float_flag_inexact);
+    zSigExact = zSig0;
+    zSig0 += roundIncrement;
+    if (zSig0 < roundIncrement) {
+        // Basically scale by shifting right and keep overflow
+        ++zExp;
+        zSig0 = UINT64_C(0x8000000000000000);
+        zSigExact >>= 1; // must scale also, or else later tests will fail
+    }
+    roundIncrement = roundMask + 1;
+    if (roundNearestEven && (roundBits<<1 == roundIncrement))
+        roundMask |= roundIncrement;
+    zSig0 &= ~roundMask;
+    if (zSig0 > zSigExact) set_float_rounding_up(status);
+    if (zSig0 == 0) zExp = 0;
+    return packFloatx80(zSign, zExp, zSig0);
+ precision80:
+    increment = ((int64_t) zSig1 < 0);
+    if (! roundNearestEven) {
+        if (roundingMode == float_round_to_zero) increment = 0;
+        else {
+            if (zSign) {
+                increment = (roundingMode == float_round_down) && zSig1;
+            }
+            else {
+                increment = (roundingMode == float_round_up) && zSig1;
+            }
+        }
+    }
+    if (0x7FFD <= (uint32_t) (zExp - 1)) {
+        if ((0x7FFE < zExp)
+             || ((zExp == 0x7FFE)
+                  && (zSig0 == UINT64_C(0xFFFFFFFFFFFFFFFF))
+                  && increment))
+        {
+            roundMask = 0;
+ overflow:
+            float_raise(status, float_flag_overflow | float_flag_inexact);
+            if ((roundingMode == float_round_to_zero)
+                 || (zSign && (roundingMode == float_round_up))
+                 || (! zSign && (roundingMode == float_round_down)))
+            {
+                return packFloatx80(zSign, 0x7FFE, ~roundMask);
+            }
+            set_float_rounding_up(status);
+            return packFloatx80(zSign, 0x7FFF, UINT64_C(0x8000000000000000));
+        }
+        if (zExp <= 0) {
+            int isTiny = (zExp < 0) || (! increment)
+                || (zSig0 < UINT64_C(0xFFFFFFFFFFFFFFFF));
+            shift64ExtraRightJamming(zSig0, zSig1, 1 - zExp, &zSig0, &zSig1);
+            zExp = 0;
+            if (isTiny) {
+                if (zSig1 || (zSig0 && !float_exception_masked(status, float_flag_underflow)))
+                    float_raise(status, float_flag_underflow);
+            }
+            if (zSig1) float_raise(status, float_flag_inexact);
+            if (roundNearestEven) increment = ((int64_t) zSig1 < 0);
+            else {
+                if (zSign) {
+                    increment = (roundingMode == float_round_down) && zSig1;
+                } else {
+                    increment = (roundingMode == float_round_up) && zSig1;
+                }
+            }
+            if (increment) {
+                zSigExact = zSig0++;
+                zSig0 &= ~(((uint64_t) (zSig1<<1) == 0) & roundNearestEven);
+                if (zSig0 > zSigExact) set_float_rounding_up(status);
+                if ((int64_t) zSig0 < 0) zExp = 1;
+            }
+            return packFloatx80(zSign, zExp, zSig0);
+        }
+    }
+    if (zSig1) float_raise(status, float_flag_inexact);
+    if (increment) {
+        zSigExact = zSig0++;
+        if (zSig0 == 0) {
+            zExp++;
+            zSig0 = UINT64_C(0x8000000000000000);
+            zSigExact >>= 1;  // must scale also, or else later tests will fail
+        }
+        else {
+            zSig0 &= ~(((uint64_t) (zSig1<<1) == 0) & roundNearestEven);
+        }
+        if (zSig0 > zSigExact) set_float_rounding_up(status);
+    }
+    else {
+        if (zSig0 == 0) zExp = 0;
+    }
+    return packFloatx80(zSign, zExp, zSig0);
+}
+
+floatx80 roundAndPackFloatx80(int roundingPrecision,
+        int zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1, float_status_t &status)
+{
+    float_status_t round_status = status;
+    floatx80 result = SoftFloatRoundAndPackFloatx80(roundingPrecision, zSign, zExp, zSig0, zSig1, status);
+
+    // bias unmasked undeflow
+    if (status.float_exception_flags & ~status.float_exception_masks & float_flag_underflow) {
+       float_raise(round_status, float_flag_underflow);
+       return SoftFloatRoundAndPackFloatx80(roundingPrecision, zSign, zExp + 0x6000, zSig0, zSig1, status = round_status);
+    }
+
+    // bias unmasked overflow
+    if (status.float_exception_flags & ~status.float_exception_masks & float_flag_overflow) {
+       float_raise(round_status, float_flag_overflow);
+       return SoftFloatRoundAndPackFloatx80(roundingPrecision, zSign, zExp - 0x6000, zSig0, zSig1, status = round_status);
+    }
+
+    return result;
+}
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent
+| `zExp', and significand formed by the concatenation of `zSig0' and `zSig1',
+| and returns the proper extended double-precision floating-point value
+| corresponding to the abstract input.  This routine is just like
+| `roundAndPackFloatx80' except that the input significand does not have to be
+| normalized.
+*----------------------------------------------------------------------------*/
+
+floatx80 normalizeRoundAndPackFloatx80(int roundingPrecision,
+        int zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1, float_status_t &status)
+{
+    if (zSig0 == 0) {
+        zSig0 = zSig1;
+        zSig1 = 0;
+        zExp -= 64;
+    }
+    int shiftCount = countLeadingZeros64(zSig0);
+    shortShift128Left(zSig0, zSig1, shiftCount, &zSig0, &zSig1);
+    zExp -= shiftCount;
+    return
+        roundAndPackFloatx80(roundingPrecision, zSign, zExp, zSig0, zSig1, status);
+}
+
+#endif
+
+#ifdef FLOAT128
+
+/*----------------------------------------------------------------------------
+| Normalizes the subnormal quadruple-precision floating-point value
+| represented by the denormalized significand formed by the concatenation of
+| `aSig0' and `aSig1'.  The normalized exponent is stored at the location
+| pointed to by `zExpPtr'.  The most significant 49 bits of the normalized
+| significand are stored at the location pointed to by `zSig0Ptr', and the
+| least significant 64 bits of the normalized significand are stored at the
+| location pointed to by `zSig1Ptr'.
+*----------------------------------------------------------------------------*/
+
+void normalizeFloat128Subnormal(
+     uint64_t aSig0, uint64_t aSig1, int32_t *zExpPtr, uint64_t *zSig0Ptr, uint64_t *zSig1Ptr)
+{
+    int shiftCount;
+
+    if (aSig0 == 0) {
+        shiftCount = countLeadingZeros64(aSig1) - 15;
+        if (shiftCount < 0) {
+            *zSig0Ptr = aSig1 >>(-shiftCount);
+            *zSig1Ptr = aSig1 << (shiftCount & 63);
+        }
+        else {
+            *zSig0Ptr = aSig1 << shiftCount;
+            *zSig1Ptr = 0;
+        }
+        *zExpPtr = - shiftCount - 63;
+    }
+    else {
+        shiftCount = countLeadingZeros64(aSig0) - 15;
+        shortShift128Left(aSig0, aSig1, shiftCount, zSig0Ptr, zSig1Ptr);
+        *zExpPtr = 1 - shiftCount;
+    }
+}
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and extended significand formed by the concatenation of `zSig0', `zSig1',
+| and `zSig2', and returns the proper quadruple-precision floating-point value
+| corresponding to the abstract input.  Ordinarily, the abstract value is
+| simply rounded and packed into the quadruple-precision format, with the
+| inexact exception raised if the abstract input cannot be represented
+| exactly.  However, if the abstract value is too large, the overflow and
+| inexact exceptions are raised and an infinity or maximal finite value is
+| returned.  If the abstract value is too small, the input value is rounded to
+| a subnormal number, and the underflow and inexact exceptions are raised if
+| the abstract input cannot be represented exactly as a subnormal quadruple-
+| precision floating-point number.
+|     The input significand must be normalized or smaller.  If the input
+| significand is not normalized, `zExp' must be 0; in that case, the result
+| returned is a subnormal number, and it must not require rounding.  In the
+| usual case that the input significand is normalized, `zExp' must be 1 less
+| than the ``true'' floating-point exponent.  The handling of underflow and
+| overflow follows the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 roundAndPackFloat128(
+     int zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1, uint64_t zSig2, float_status_t &status)
+{
+    int increment = ((int64_t) zSig2 < 0);
+    if (0x7FFD <= (uint32_t) zExp) {
+        if ((0x7FFD < zExp)
+             || ((zExp == 0x7FFD)
+                  && eq128(UINT64_C(0x0001FFFFFFFFFFFF),
+                         UINT64_C(0xFFFFFFFFFFFFFFFF), zSig0, zSig1)
+                  && increment))
+        {
+            float_raise(status, float_flag_overflow | float_flag_inexact);
+            return packFloat128(zSign, 0x7FFF, 0, 0);
+        }
+        if (zExp < 0) {
+            int isTiny = (zExp < -1)
+                || ! increment
+                || lt128(zSig0, zSig1,
+                       UINT64_C(0x0001FFFFFFFFFFFF),
+                       UINT64_C(0xFFFFFFFFFFFFFFFF));
+            shift128ExtraRightJamming(
+                zSig0, zSig1, zSig2, -zExp, &zSig0, &zSig1, &zSig2);
+            zExp = 0;
+            if (isTiny && zSig2) float_raise(status, float_flag_underflow);
+            increment = ((int64_t) zSig2 < 0);
+        }
+    }
+    if (zSig2) float_raise(status, float_flag_inexact);
+    if (increment) {
+        add128(zSig0, zSig1, 0, 1, &zSig0, &zSig1);
+        zSig1 &= ~((zSig2 + zSig2 == 0) & 1);
+    }
+    else {
+        if ((zSig0 | zSig1) == 0) zExp = 0;
+    }
+    return packFloat128(zSign, zExp, zSig0, zSig1);
+}
+
+/*----------------------------------------------------------------------------
+| Takes an abstract floating-point value having sign `zSign', exponent `zExp',
+| and significand formed by the concatenation of `zSig0' and `zSig1', and
+| returns the proper quadruple-precision floating-point value corresponding
+| to the abstract input.  This routine is just like `roundAndPackFloat128'
+| except that the input significand has fewer bits and does not have to be
+| normalized.  In all cases, `zExp' must be 1 less than the ``true'' floating-
+| point exponent.
+*----------------------------------------------------------------------------*/
+
+float128 normalizeRoundAndPackFloat128(
+     int zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1, float_status_t &status)
+{
+    uint64_t zSig2;
+
+    if (zSig0 == 0) {
+        zSig0 = zSig1;
+        zSig1 = 0;
+        zExp -= 64;
+    }
+    int shiftCount = countLeadingZeros64(zSig0) - 15;
+    if (0 <= shiftCount) {
+        zSig2 = 0;
+        shortShift128Left(zSig0, zSig1, shiftCount, &zSig0, &zSig1);
+    }
+    else {
+        shift128ExtraRightJamming(
+            zSig0, zSig1, 0, -shiftCount, &zSig0, &zSig1, &zSig2);
+    }
+    zExp -= shiftCount;
+    return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
+}
+
+END_SOFTFLOAT_NS
+
+#endif
diff -r 372d3611c693 -r e89a7cf22da3 ext/softfloat/softfloat-specialize.hh
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ext/softfloat/softfloat-specialize.hh	Thu Sep 19 18:09:15 2013 +0200
@@ -0,0 +1,764 @@
+/*============================================================================
+This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
+Arithmetic Package, Release 2b.
+
+Written by John R. Hauser.  This work was made possible in part by the
+International Computer Science Institute, located at Suite 600, 1947 Center
+Street, Berkeley, California 94704.  Funding was partially provided by the
+National Science Foundation under grant MIP-9311980.  The original version
+of this code was written as part of a project to build a fixed-point vector
+processor in collaboration with the University of California at Berkeley,
+overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
+is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
+arithmetic/SoftFloat.html'.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+=============================================================================*/
+
+#ifndef _SOFTFLOAT_SPECIALIZE_H_
+#define _SOFTFLOAT_SPECIALIZE_H_
+
+#include "softfloat.hh"
+#include "softfloat-macros.hh"
+
+BEGIN_SOFTFLOAT_NS
+
+/*============================================================================
+ * Adapted for Bochs (x86 achitecture simulator) by
+ *            Stanislav Shwartsman [sshwarts at sourceforge net]
+ * ==========================================================================*/
+
+#define int16_indefinite ((int16_t)0x8000)
+#define int32_indefinite ((int32_t)0x80000000)
+#define int64_indefinite UINT64_C(0x8000000000000000)
+
+/*----------------------------------------------------------------------------
+| Internal canonical NaN format.
+*----------------------------------------------------------------------------*/
+
+typedef struct {
+    int sign;
+    uint64_t hi, lo;
+} commonNaNT;
+
+#ifdef FLOAT16
+
+/*----------------------------------------------------------------------------
+| The pattern for a default generated half-precision NaN.
+*----------------------------------------------------------------------------*/
+#define float16_default_nan 0xFE00
+
+#define float16_fraction extractFloat16Frac
+#define float16_exp extractFloat16Exp
+#define float16_sign extractFloat16Sign
+
+/*----------------------------------------------------------------------------
+| Returns the fraction bits of the half-precision floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+inline uint16_t extractFloat16Frac(float16 a)
+{
+    return a & 0x3FF;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the exponent bits of the half-precision floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+inline int16_t extractFloat16Exp(float16 a)
+{
+    return (a>>10) & 0x1F;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the sign bit of the half-precision floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+inline int extractFloat16Sign(float16 a)
+{
+    return a>>15;
+}
+
+/*----------------------------------------------------------------------------
+| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
+| single-precision floating-point value, returning the result.  After being
+| shifted into the proper positions, the three fields are simply added
+| together to form the result.  This means that any integer portion of `zSig'
+| will be added into the exponent.  Since a properly normalized significand
+| will have an integer portion equal to 1, the `zExp' input should be 1 less
+| than the desired result exponent whenever `zSig' is a complete, normalized
+| significand.
+*----------------------------------------------------------------------------*/
+
+inline float16 packFloat16(int zSign, int zExp, uint16_t zSig)
+{
+    return (((uint16_t) zSign)<<15) + (((uint16_t) zExp)<<10) + zSig;
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the half-precision floating-point value `a' is a NaN;
+| otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+inline int float16_is_nan(float16 a)
+{
+    return (0xF800 < (uint16_t) (a<<1));
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the half-precision floating-point value `a' is a signaling
+| NaN; otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+inline int float16_is_signaling_nan(float16 a)
+{
+    return (((a>>9) & 0x3F) == 0x3E) && (a & 0x1FF);
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the half-precision floating-point value `a' is denormal;
+| otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+inline int float16_is_denormal(float16 a)
+{
+   return (extractFloat16Exp(a) == 0) && (extractFloat16Frac(a) != 0);
+}
+
+/*----------------------------------------------------------------------------
+| Convert float16 denormals to zero.
+*----------------------------------------------------------------------------*/
+
+inline float16 float16_denormal_to_zero(float16 a)
+{
+  if (float16_is_denormal(a)) a &= 0x8000;
+  return a;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the half-precision floating-point NaN
+| `a' to the canonical NaN format. If `a' is a signaling NaN, the invalid
+| exception is raised.
+*----------------------------------------------------------------------------*/
+
+inline commonNaNT float16ToCommonNaN(float16 a, float_status_t &status)
+{
+    commonNaNT z;
+    if (float16_is_signaling_nan(a)) float_raise(status, float_flag_invalid);
+    z.sign = a>>15;
+    z.lo = 0;
+    z.hi = ((uint64_t) a)<<54;
+    return z;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the canonical NaN `a' to the half-
+| precision floating-point format.
+*----------------------------------------------------------------------------*/
+
+inline float16 commonNaNToFloat16(commonNaNT a)
+{
+    return (((uint16_t) a.sign)<<15) | 0x7E00 | (uint16_t)(a.hi>>54);
+}
+
+#endif
+
+/*----------------------------------------------------------------------------
+| The pattern for a default generated single-precision NaN.
+*----------------------------------------------------------------------------*/
+#define float32_default_nan 0xFFC00000
+
+#define float32_fraction extractFloat32Frac
+#define float32_exp extractFloat32Exp
+#define float32_sign extractFloat32Sign
+
+/*----------------------------------------------------------------------------
+| Returns the fraction bits of the single-precision floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+inline uint32_t extractFloat32Frac(float32 a)
+{
+    return a & 0x007FFFFF;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the exponent bits of the single-precision floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+inline int16_t extractFloat32Exp(float32 a)
+{
+    return (a>>23) & 0xFF;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the sign bit of the single-precision floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+inline int extractFloat32Sign(float32 a)
+{
+    return a>>31;
+}
+
+/*----------------------------------------------------------------------------
+| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
+| single-precision floating-point value, returning the result.  After being
+| shifted into the proper positions, the three fields are simply added
+| together to form the result.  This means that any integer portion of `zSig'
+| will be added into the exponent.  Since a properly normalized significand
+| will have an integer portion equal to 1, the `zExp' input should be 1 less
+| than the desired result exponent whenever `zSig' is a complete, normalized
+| significand.
+*----------------------------------------------------------------------------*/
+
+inline float32 packFloat32(int zSign, int16_t zExp, uint32_t zSig)
+{
+    return (((uint32_t) zSign)<<31) + (((uint32_t) zExp)<<23) + zSig;
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the single-precision floating-point value `a' is a NaN;
+| otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+inline int float32_is_nan(float32 a)
+{
+    return (0xFF000000 < (uint32_t) (a<<1));
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the single-precision floating-point value `a' is a signaling
+| NaN; otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+inline int float32_is_signaling_nan(float32 a)
+{
+    return (((a>>22) & 0x1FF) == 0x1FE) && (a & 0x003FFFFF);
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the single-precision floating-point value `a' is denormal;
+| otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+inline int float32_is_denormal(float32 a)
+{
+   return (extractFloat32Exp(a) == 0) && (extractFloat32Frac(a) != 0);
+}
+
+/*----------------------------------------------------------------------------
+| Convert float32 denormals to zero.
+*----------------------------------------------------------------------------*/
+
+inline float32 float32_denormal_to_zero(float32 a)
+{
+  if (float32_is_denormal(a)) a &= 0x80000000;
+  return a;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the single-precision floating-point NaN
+| `a' to the canonical NaN format.  If `a' is a signaling NaN, the invalid
+| exception is raised.
+*----------------------------------------------------------------------------*/
+
+inline commonNaNT float32ToCommonNaN(float32 a, float_status_t &status)
+{
+    commonNaNT z;
+    if (float32_is_signaling_nan(a)) float_raise(status, float_flag_invalid);
+    z.sign = a>>31;
+    z.lo = 0;
+    z.hi = ((uint64_t) a)<<41;
+    return z;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the canonical NaN `a' to the single-
+| precision floating-point format.
+*----------------------------------------------------------------------------*/
+
+inline float32 commonNaNToFloat32(commonNaNT a)
+{
+    return (((uint32_t) a.sign)<<31) | 0x7FC00000 | (uint32_t)(a.hi>>41);
+}
+
+/*----------------------------------------------------------------------------
+| Takes two single-precision floating-point values `a' and `b', one of which
+| is a NaN, and returns the appropriate NaN result.  If either `a' or `b' is a
+| signaling NaN, the invalid exception is raised.
+*----------------------------------------------------------------------------*/
+
+float32 propagateFloat32NaN(float32 a, float32 b, float_status_t &status);
+
+/*----------------------------------------------------------------------------
+| Takes single-precision floating-point NaN `a' and returns the appropriate
+| NaN result.  If `a' is a signaling NaN, the invalid exception is raised.
+*----------------------------------------------------------------------------*/
+
+inline float32 propagateFloat32NaN(float32 a, float_status_t &status)
+{
+    if (float32_is_signaling_nan(a))
+        float_raise(status, float_flag_invalid);
+
+    return a | 0x00400000;
+}
+
+/*----------------------------------------------------------------------------
+| The pattern for a default generated double-precision NaN.
+*----------------------------------------------------------------------------*/
+#define float64_default_nan UINT64_C(0xFFF8000000000000)
+
+#define float64_fraction extractFloat64Frac
+#define float64_exp extractFloat64Exp
+#define float64_sign extractFloat64Sign
+
+/*----------------------------------------------------------------------------
+| Returns the fraction bits of the double-precision floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+inline uint64_t extractFloat64Frac(float64 a)
+{
+    return a & UINT64_C(0x000FFFFFFFFFFFFF);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the exponent bits of the double-precision floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+inline int16_t extractFloat64Exp(float64 a)
+{
+    return (int16_t)(a>>52) & 0x7FF;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the sign bit of the double-precision floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+inline int extractFloat64Sign(float64 a)
+{
+    return (int)(a>>63);
+}
+
+/*----------------------------------------------------------------------------
+| Packs the sign `zSign', exponent `zExp', and significand `zSig' into a
+| double-precision floating-point value, returning the result.  After being
+| shifted into the proper positions, the three fields are simply added
+| together to form the result.  This means that any integer portion of `zSig'
+| will be added into the exponent.  Since a properly normalized significand
+| will have an integer portion equal to 1, the `zExp' input should be 1 less
+| than the desired result exponent whenever `zSig' is a complete, normalized
+| significand.
+*----------------------------------------------------------------------------*/
+
+inline float64 packFloat64(int zSign, int16_t zExp, uint64_t zSig)
+{
+    return (((uint64_t) zSign)<<63) + (((uint64_t) zExp)<<52) + zSig;
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the double-precision floating-point value `a' is a NaN;
+| otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+inline int float64_is_nan(float64 a)
+{
+    return (UINT64_C(0xFFE0000000000000) < (uint64_t) (a<<1));
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the double-precision floating-point value `a' is a signaling
+| NaN; otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+inline int float64_is_signaling_nan(float64 a)
+{
+    return (((a>>51) & 0xFFF) == 0xFFE) && (a & UINT64_C(0x0007FFFFFFFFFFFF));
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the double-precision floating-point value `a' is denormal;
+| otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+inline int float64_is_denormal(float64 a)
+{
+   return (extractFloat64Exp(a) == 0) && (extractFloat64Frac(a) != 0);
+}
+
+/*----------------------------------------------------------------------------
+| Convert float64 denormals to zero.
+*----------------------------------------------------------------------------*/
+
+inline float64 float64_denormal_to_zero(float64 a)
+{
+  if (float64_is_denormal(a)) a &= ((uint64_t)(1) << 63);
+  return a;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the double-precision floating-point NaN
+| `a' to the canonical NaN format.  If `a' is a signaling NaN, the invalid
+| exception is raised.
+*----------------------------------------------------------------------------*/
+
+inline commonNaNT float64ToCommonNaN(float64 a, float_status_t &status)
+{
+    commonNaNT z;
+    if (float64_is_signaling_nan(a)) float_raise(status, float_flag_invalid);
+    z.sign = (int)(a>>63);
+    z.lo = 0;
+    z.hi = a<<12;
+    return z;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the canonical NaN `a' to the double-
+| precision floating-point format.
+*----------------------------------------------------------------------------*/
+
+inline float64 commonNaNToFloat64(commonNaNT a)
+{
+    return (((uint64_t) a.sign)<<63) | UINT64_C(0x7FF8000000000000) | (a.hi>>12);
+}
+
+/*----------------------------------------------------------------------------
+| Takes two double-precision floating-point values `a' and `b', one of which
+| is a NaN, and returns the appropriate NaN result.  If either `a' or `b' is a
+| signaling NaN, the invalid exception is raised.
+*----------------------------------------------------------------------------*/
+
+float64 propagateFloat64NaN(float64 a, float64 b, float_status_t &status);
+
+/*----------------------------------------------------------------------------
+| Takes double-precision floating-point NaN `a' and returns the appropriate
+| NaN result.  If `a' is a signaling NaN, the invalid exception is raised.
+*----------------------------------------------------------------------------*/
+
+inline float64 propagateFloat64NaN(float64 a, float_status_t &status)
+{
+    if (float64_is_signaling_nan(a))
+        float_raise(status, float_flag_invalid);
+
+    return a | UINT64_C(0x0008000000000000);
+}
+
+#ifdef FLOATX80
+
+/*----------------------------------------------------------------------------
+| The pattern for a default generated extended double-precision NaN.  The
+| `high' and `low' values hold the most- and least-significant bits,
+| respectively.
+*----------------------------------------------------------------------------*/
+#define floatx80_default_nan_exp 0xFFFF
+#define floatx80_default_nan_fraction UINT64_C(0xC000000000000000)
+
+#define floatx80_fraction extractFloatx80Frac
+#define floatx80_exp extractFloatx80Exp
+#define floatx80_sign extractFloatx80Sign
+
+#define EXP_BIAS 0x3FFF
+
+/*----------------------------------------------------------------------------
+| Returns the fraction bits of the extended double-precision floating-point
+| value `a'.
+*----------------------------------------------------------------------------*/
+
+inline uint64_t extractFloatx80Frac(floatx80 a)
+{
+    return a.fraction;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the exponent bits of the extended double-precision floating-point
+| value `a'.
+*----------------------------------------------------------------------------*/
+
+inline int32_t extractFloatx80Exp(floatx80 a)
+{
+    return a.exp & 0x7FFF;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the sign bit of the extended double-precision floating-point value
+| `a'.
+*----------------------------------------------------------------------------*/
+
+inline int extractFloatx80Sign(floatx80 a)
+{
+    return a.exp>>15;
+}
+
+/*----------------------------------------------------------------------------
+| Packs the sign `zSign', exponent `zExp', and significand `zSig' into an
+| extended double-precision floating-point value, returning the result.
+*----------------------------------------------------------------------------*/
+
+inline floatx80 packFloatx80(int zSign, int32_t zExp, uint64_t zSig)
+{
+    floatx80 z;
+    z.fraction = zSig;
+    z.exp = (zSign << 15) + zExp;
+    return z;
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the extended double-precision floating-point value `a' is a
+| NaN; otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+inline int floatx80_is_nan(floatx80 a)
+{
+    return ((a.exp & 0x7FFF) == 0x7FFF) && (int64_t) (a.fraction<<1);
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the extended double-precision floating-point value `a' is a
+| signaling NaN; otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+inline int floatx80_is_signaling_nan(floatx80 a)
+{
+    uint64_t aLow = a.fraction & ~UINT64_C(0x4000000000000000);
+    return ((a.exp & 0x7FFF) == 0x7FFF) &&
+            ((uint64_t) (aLow<<1)) && (a.fraction == aLow);
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the extended double-precision floating-point value `a' is an
+| unsupported; otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+inline int floatx80_is_unsupported(floatx80 a)
+{
+    return ((a.exp & 0x7FFF) && !(a.fraction & UINT64_C(0x8000000000000000)));
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the extended double-precision floating-
+| point NaN `a' to the canonical NaN format. If `a' is a signaling NaN, the
+| invalid exception is raised.
+*----------------------------------------------------------------------------*/
+
+inline commonNaNT floatx80ToCommonNaN(floatx80 a, float_status_t &status)
+{
+    commonNaNT z;
+    if (floatx80_is_signaling_nan(a)) float_raise(status, float_flag_invalid);
+    z.sign = a.exp >> 15;
+    z.lo = 0;
+    z.hi = a.fraction << 1;
+    return z;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the canonical NaN `a' to the extended
+| double-precision floating-point format.
+*----------------------------------------------------------------------------*/
+
+inline floatx80 commonNaNToFloatx80(commonNaNT a)
+{
+    floatx80 z;
+    z.fraction = UINT64_C(0xC000000000000000) | (a.hi>>1);
+    z.exp = (((uint16_t) a.sign)<<15) | 0x7FFF;
+    return z;
+}
+
+/*----------------------------------------------------------------------------
+| Takes two extended double-precision floating-point values `a' and `b', one
+| of which is a NaN, and returns the appropriate NaN result.  If either `a' or
+| `b' is a signaling NaN, the invalid exception is raised.
+*----------------------------------------------------------------------------*/
+
+floatx80 propagateFloatx80NaN(floatx80 a, floatx80 b, float_status_t &status);
+
+/*----------------------------------------------------------------------------
+| Takes extended double-precision floating-point  NaN  `a' and returns the
+| appropriate NaN result. If `a' is a signaling NaN, the invalid exception
+| is raised.
+*----------------------------------------------------------------------------*/
+
+inline floatx80 propagateFloatx80NaN(floatx80 a, float_status_t &status)
+{
+    if (floatx80_is_signaling_nan(a))
+        float_raise(status, float_flag_invalid);
+
+    a.fraction |= UINT64_C(0xC000000000000000);
+
+    return a;
+}
+
+/*----------------------------------------------------------------------------
+| The pattern for a default generated extended double-precision NaN.
+*----------------------------------------------------------------------------*/
+extern const floatx80 floatx80_default_nan;
+
+#endif /* FLOATX80 */
+
+#ifdef FLOAT128
+
+/*----------------------------------------------------------------------------
+| The pattern for a default generated quadruple-precision NaN. The `high' and
+| `low' values hold the most- and least-significant bits, respectively.
+*----------------------------------------------------------------------------*/
+#define float128_default_nan_hi UINT64_C(0xFFFF800000000000)
+#define float128_default_nan_lo UINT64_C(0x0000000000000000)
+
+#define float128_exp extractFloat128Exp
+
+/*----------------------------------------------------------------------------
+| Returns the least-significant 64 fraction bits of the quadruple-precision
+| floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+inline uint64_t extractFloat128Frac1(float128 a)
+{
+    return a.lo;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the most-significant 48 fraction bits of the quadruple-precision
+| floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+inline uint64_t extractFloat128Frac0(float128 a)
+{
+    return a.hi & UINT64_C(0x0000FFFFFFFFFFFF);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the exponent bits of the quadruple-precision floating-point value
+| `a'.
+*----------------------------------------------------------------------------*/
+
+inline int32_t extractFloat128Exp(float128 a)
+{
+    return ((int32_t)(a.hi>>48)) & 0x7FFF;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the sign bit of the quadruple-precision floating-point value `a'.
+*----------------------------------------------------------------------------*/
+
+inline int extractFloat128Sign(float128 a)
+{
+    return (int)(a.hi >> 63);
+}
+
+/*----------------------------------------------------------------------------
+| Packs the sign `zSign', the exponent `zExp', and the significand formed
+| by the concatenation of `zSig0' and `zSig1' into a quadruple-precision
+| floating-point value, returning the result.  After being shifted into the
+| proper positions, the three fields `zSign', `zExp', and `zSig0' are simply
+| added together to form the most significant 32 bits of the result.  This
+| means that any integer portion of `zSig0' will be added into the exponent.
+| Since a properly normalized significand will have an integer portion equal
+| to 1, the `zExp' input should be 1 less than the desired result exponent
+| whenever `zSig0' and `zSig1' concatenated form a complete, normalized
+| significand.
+*----------------------------------------------------------------------------*/
+
+inline float128 packFloat128(int zSign, int32_t zExp, uint64_t zSig0, uint64_t zSig1)
+{
+    float128 z;
+    z.lo = zSig1;
+    z.hi = (((uint64_t) zSign)<<63) + (((uint64_t) zExp)<<48) + zSig0;
+    return z;
+}
+
+/*----------------------------------------------------------------------------
+| Packs two 64-bit precision integers into into the quadruple-precision
+| floating-point value, returning the result.
+*----------------------------------------------------------------------------*/
+
+inline float128 packFloat128(uint64_t zHi, uint64_t zLo)
+{
+    float128 z;
+    z.lo = zLo;
+    z.hi = zHi;
+    return z;
+}
+
+#ifdef _MSC_VER
+#define PACK_FLOAT_128(hi,lo) { lo, hi }
+#else
+#define PACK_FLOAT_128(hi,lo) packFloat128(UINT64_C(hi),UINT64_C(lo))
+#endif
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the quadruple-precision floating-point value `a' is a NaN;
+| otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+inline int float128_is_nan(float128 a)
+{
+    return (UINT64_C(0xFFFE000000000000) <= (uint64_t) (a.hi<<1))
+        && (a.lo || (a.hi & UINT64_C(0x0000FFFFFFFFFFFF)));
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the quadruple-precision floating-point value `a' is a
+| signaling NaN; otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+inline int float128_is_signaling_nan(float128 a)
+{
+    return (((a.hi>>47) & 0xFFFF) == 0xFFFE)
+        && (a.lo || (a.hi & UINT64_C(0x00007FFFFFFFFFFF)));
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the quadruple-precision floating-point NaN
+| `a' to the canonical NaN format.  If `a' is a signaling NaN, the invalid
+| exception is raised.
+*----------------------------------------------------------------------------*/
+
+inline commonNaNT float128ToCommonNaN(float128 a, float_status_t &status)
+{
+    commonNaNT z;
+    if (float128_is_signaling_nan(a)) float_raise(status, float_flag_invalid);
+    z.sign = (int)(a.hi>>63);
+    shortShift128Left(a.hi, a.lo, 16, &z.hi, &z.lo);
+    return z;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the canonical NaN `a' to the quadruple-
+| precision floating-point format.
+*----------------------------------------------------------------------------*/
+
+inline float128 commonNaNToFloat128(commonNaNT a)
+{
+    float128 z;
+    shift128Right(a.hi, a.lo, 16, &z.hi, &z.lo);
+    z.hi |= (((uint64_t) a.sign)<<63) | UINT64_C(0x7FFF800000000000);
+    return z;
+}
+
+/*----------------------------------------------------------------------------
+| Takes two quadruple-precision floating-point values `a' and `b', one of
+| which is a NaN, and returns the appropriate NaN result.  If either `a' or
+| `b' is a signaling NaN, the invalid exception is raised.
+*----------------------------------------------------------------------------*/
+
+float128 propagateFloat128NaN(float128 a, float128 b, float_status_t &status);
+
+/*----------------------------------------------------------------------------
+| The pattern for a default generated quadruple-precision NaN.
+*----------------------------------------------------------------------------*/
+extern const float128 float128_default_nan;
+
+#endif /* FLOAT128 */
+
+END_SOFTFLOAT_NS
+
+#endif
diff -r 372d3611c693 -r e89a7cf22da3 ext/softfloat/softfloat-specialize.cc
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ext/softfloat/softfloat-specialize.cc	Thu Sep 19 18:09:15 2013 +0200
@@ -0,0 +1,197 @@
+/*============================================================================
+This C source fragment is part of the SoftFloat IEC/IEEE Floating-point
+Arithmetic Package, Release 2b.
+
+Written by John R. Hauser.  This work was made possible in part by the
+International Computer Science Institute, located at Suite 600, 1947 Center
+Street, Berkeley, California 94704.  Funding was partially provided by the
+National Science Foundation under grant MIP-9311980.  The original version
+of this code was written as part of a project to build a fixed-point vector
+processor in collaboration with the University of California at Berkeley,
+overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
+is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
+arithmetic/SoftFloat.html'.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+=============================================================================*/
+
+#define FLOAT128
+
+/*============================================================================
+ * Adapted for Bochs (x86 achitecture simulator) by
+ *            Stanislav Shwartsman [sshwarts at sourceforge net]
+ * ==========================================================================*/
+
+#include "softfloat.hh"
+#include "softfloat-specialize.hh"
+
+BEGIN_SOFTFLOAT_NS
+
+/*----------------------------------------------------------------------------
+| Takes two single-precision floating-point values `a' and `b', one of which
+| is a NaN, and returns the appropriate NaN result.  If either `a' or `b' is a
+| signaling NaN, the invalid exception is raised.
+*----------------------------------------------------------------------------*/
+
+float32 propagateFloat32NaN(float32 a, float32 b, float_status_t &status)
+{
+    int aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
+
+    aIsNaN = float32_is_nan(a);
+    aIsSignalingNaN = float32_is_signaling_nan(a);
+    bIsNaN = float32_is_nan(b);
+    bIsSignalingNaN = float32_is_signaling_nan(b);
+    a |= 0x00400000;
+    b |= 0x00400000;
+    if (aIsSignalingNaN | bIsSignalingNaN) float_raise(status, float_flag_invalid);
+    if (get_float_nan_handling_mode(status) == float_larger_significand_nan) {
+        if (aIsSignalingNaN) {
+            if (bIsSignalingNaN) goto returnLargerSignificand;
+            return bIsNaN ? b : a;
+        }
+        else if (aIsNaN) {
+            if (bIsSignalingNaN | ! bIsNaN) return a;
+      returnLargerSignificand:
+            if ((uint32_t) (a<<1) < (uint32_t) (b<<1)) return b;
+            if ((uint32_t) (b<<1) < (uint32_t) (a<<1)) return a;
+            return (a < b) ? a : b;
+        }
+        else {
+            return b;
+        }
+    } else {
+        return (aIsSignalingNaN | aIsNaN) ? a : b;
+    }
+}
+
+/*----------------------------------------------------------------------------
+| Takes two double-precision floating-point values `a' and `b', one of which
+| is a NaN, and returns the appropriate NaN result.  If either `a' or `b' is a
+| signaling NaN, the invalid exception is raised.
+*----------------------------------------------------------------------------*/
+
+float64 propagateFloat64NaN(float64 a, float64 b, float_status_t &status)
+{
+    int aIsNaN = float64_is_nan(a);
+    int aIsSignalingNaN = float64_is_signaling_nan(a);
+    int bIsNaN = float64_is_nan(b);
+    int bIsSignalingNaN = float64_is_signaling_nan(b);
+    a |= UINT64_C(0x0008000000000000);
+    b |= UINT64_C(0x0008000000000000);
+    if (aIsSignalingNaN | bIsSignalingNaN) float_raise(status, float_flag_invalid);
+    if (get_float_nan_handling_mode(status) == float_larger_significand_nan) {
+        if (aIsSignalingNaN) {
+            if (bIsSignalingNaN) goto returnLargerSignificand;
+            return bIsNaN ? b : a;
+        }
+        else if (aIsNaN) {
+            if (bIsSignalingNaN | ! bIsNaN) return a;
+      returnLargerSignificand:
+            if ((uint64_t) (a<<1) < (uint64_t) (b<<1)) return b;
+            if ((uint64_t) (b<<1) < (uint64_t) (a<<1)) return a;
+            return (a < b) ? a : b;
+        }
+        else {
+            return b;
+        }
+    } else {
+        return (aIsSignalingNaN | aIsNaN) ? a : b;
+    }
+}
+
+#ifdef FLOATX80
+
+/*----------------------------------------------------------------------------
+| Takes two extended double-precision floating-point values `a' and `b', one
+| of which is a NaN, and returns the appropriate NaN result.  If either `a' or
+| `b' is a signaling NaN, the invalid exception is raised.
+*----------------------------------------------------------------------------*/
+
+floatx80 propagateFloatx80NaN(floatx80 a, floatx80 b, float_status_t &status)
+{
+    int aIsNaN = floatx80_is_nan(a);
+    int aIsSignalingNaN = floatx80_is_signaling_nan(a);
+    int bIsNaN = floatx80_is_nan(b);
+    int bIsSignalingNaN = floatx80_is_signaling_nan(b);
+    a.fraction |= UINT64_C(0xC000000000000000);
+    b.fraction |= UINT64_C(0xC000000000000000);
+    if (aIsSignalingNaN | bIsSignalingNaN) float_raise(status, float_flag_invalid);
+    if (aIsSignalingNaN) {
+        if (bIsSignalingNaN) goto returnLargerSignificand;
+        return bIsNaN ? b : a;
+    }
+    else if (aIsNaN) {
+        if (bIsSignalingNaN | ! bIsNaN) return a;
+ returnLargerSignificand:
+        if (a.fraction < b.fraction) return b;
+        if (b.fraction < a.fraction) return a;
+        return (a.exp < b.exp) ? a : b;
+    }
+    else {
+        return b;
+    }
+}
+
+/*----------------------------------------------------------------------------
+| The pattern for a default generated extended double-precision NaN.
+*----------------------------------------------------------------------------*/
+const floatx80 floatx80_default_nan =
+    packFloatx80(0, floatx80_default_nan_exp, floatx80_default_nan_fraction);
+
+#endif /* FLOATX80 */
+
+#ifdef FLOAT128
+
+/*----------------------------------------------------------------------------
+| Takes two quadruple-precision floating-point values `a' and `b', one of
+| which is a NaN, and returns the appropriate NaN result.  If either `a' or
+| `b' is a signaling NaN, the invalid exception is raised.
+*----------------------------------------------------------------------------*/
+
+float128 propagateFloat128NaN(float128 a, float128 b, float_status_t &status)
+{
+    int aIsNaN, aIsSignalingNaN, bIsNaN, bIsSignalingNaN;
+    aIsNaN = float128_is_nan(a);
+    aIsSignalingNaN = float128_is_signaling_nan(a);
+    bIsNaN = float128_is_nan(b);
+    bIsSignalingNaN = float128_is_signaling_nan(b);
+    a.hi |= UINT64_C(0x0000800000000000);
+    b.hi |= UINT64_C(0x0000800000000000);
+    if (aIsSignalingNaN | bIsSignalingNaN) float_raise(status, float_flag_invalid);
+    if (aIsSignalingNaN) {
+        if (bIsSignalingNaN) goto returnLargerSignificand;
+        return bIsNaN ? b : a;
+    }
+    else if (aIsNaN) {
+        if (bIsSignalingNaN | !bIsNaN) return a;
+ returnLargerSignificand:
+        if (lt128(a.hi<<1, a.lo, b.hi<<1, b.lo)) return b;
+        if (lt128(b.hi<<1, b.lo, a.hi<<1, a.lo)) return a;
+        return (a.hi < b.hi) ? a : b;
+    }
+    else {
+        return b;
+    }
+}
+
+/*----------------------------------------------------------------------------
+| The pattern for a default generated quadruple-precision NaN.
+*----------------------------------------------------------------------------*/
+const float128 float128_default_nan =
+    packFloat128(float128_default_nan_hi, float128_default_nan_lo);
+
+END_SOFTFLOAT_NS
+
+#endif /* FLOAT128 */
diff -r 372d3611c693 -r e89a7cf22da3 ext/softfloat/softfloat.hh
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ext/softfloat/softfloat.hh	Thu Sep 19 18:09:15 2013 +0200
@@ -0,0 +1,450 @@
+/*============================================================================
+This C header file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic
+Package, Release 2b.
+
+Written by John R. Hauser.  This work was made possible in part by the
+International Computer Science Institute, located at Suite 600, 1947 Center
+Street, Berkeley, California 94704.  Funding was partially provided by the
+National Science Foundation under grant MIP-9311980.  The original version
+of this code was written as part of a project to build a fixed-point vector
+processor in collaboration with the University of California at Berkeley,
+overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
+is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
+arithmetic/SoftFloat.html'.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+=============================================================================*/
+
+/*============================================================================
+ * Adapted for Bochs (x86 achitecture simulator) by
+ *            Stanislav Shwartsman [sshwarts at sourceforge net]
+ * ==========================================================================*/
+
+#ifndef _SOFTFLOAT_HH_
+#define _SOFTFLOAT_HH_
+
+#include <cstdint>
+#include "softfloat_ns.hh"
+
+// This lets us figure out what the byte order of the host system is
+#if defined(__linux__)
+#include <endian.h>
+#elif defined (__sun)
+#include <sys/isa_defs.h>
+#else
+#include <machine/endian.h>
+#endif
+
+#if defined(_BIG_ENDIAN) || BYTE_ORDER == BIG_ENDIAN
+#define SOFTFLOAT_BIG_ENDIAN
+#elif defined(_LITTLE_ENDIAN) || BYTE_ORDER == LITTLE_ENDIAN
+#define SOFTFLOAT_LITTLE_ENDIAN
+#else
+#error Cannot determine host byte order.
+#endif
+
+BEGIN_SOFTFLOAT_NS
+
+#define FLOAT16
+#define FLOATX80
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE floating-point types.
+*----------------------------------------------------------------------------*/
+#ifdef FLOAT16
+typedef uint16_t float16;
+#endif
+typedef uint32_t float32;
+typedef uint64_t float64;
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE floating-point class.
+*----------------------------------------------------------------------------*/
+typedef enum {
+    float_zero,
+    float_NaN,
+    float_negative_inf,
+    float_positive_inf,
+    float_denormal,
+    float_normalized
+} float_class_t;
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE floating-point NaN operands handling mode.
+*----------------------------------------------------------------------------*/
+enum float_nan_handling_mode_t {
+    float_larger_significand_nan = 0,	// this mode used by x87 FPU
+    float_first_operand_nan = 1		// this mode used by SSE
+};
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE floating-point rounding mode.
+*----------------------------------------------------------------------------*/
+enum float_round_t {
+    float_round_nearest_even = 0,
+    float_round_down         = 1,
+    float_round_up           = 2,
+    float_round_to_zero      = 3
+};
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE floating-point exception flags.
+*----------------------------------------------------------------------------*/
+enum float_exception_flag_t {
+    float_flag_invalid   = 0x01,
+    float_flag_denormal  = 0x02,
+    float_flag_divbyzero = 0x04,
+    float_flag_overflow  = 0x08,
+    float_flag_underflow = 0x10,
+    float_flag_inexact   = 0x20
+};
+
+#ifdef FLOATX80
+#define RAISE_SW_C1 0x0200
+#endif
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE floating-point ordering relations
+*----------------------------------------------------------------------------*/
+enum {
+    float_relation_less      = -1,
+    float_relation_equal     =  0,
+    float_relation_greater   =  1,
+    float_relation_unordered =  2
+};
+
+/*----------------------------------------------------------------------------
+| Options to indicate which negations to perform in float*_muladd()
+| Using these differs from negating an input or output before calling
+| the muladd function in that this means that a NaN doesn't have its
+| sign bit inverted before it is propagated.
+*----------------------------------------------------------------------------*/
+enum {
+    float_muladd_negate_c       = 1,
+    float_muladd_negate_product = 2,
+    float_muladd_negate_result  = float_muladd_negate_c | float_muladd_negate_product
+};
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE floating-point status structure.
+*----------------------------------------------------------------------------*/
+struct float_status_t
+{
+#ifdef FLOATX80
+    int float_rounding_precision;	/* floatx80 only */
+#endif
+    int float_rounding_mode;
+    int float_exception_flags;
+    int float_exception_masks;
+    int float_nan_handling_mode;	/* flag register */
+    int flush_underflow_to_zero;	/* flag register */
+    int denormals_are_zeros;            /* flag register */
+};
+
+/*----------------------------------------------------------------------------
+| Routine to raise any or all of the software IEC/IEEE floating-point
+| exception flags.
+*----------------------------------------------------------------------------*/
+
+inline void float_raise(float_status_t &status, int flags)
+{
+    status.float_exception_flags |= flags;
+}
+
+/*----------------------------------------------------------------------------
+| Routine to check if any or all of the software IEC/IEEE floating-point
+| exceptions are masked.
+*----------------------------------------------------------------------------*/
+
+inline int float_exception_masked(const float_status_t &status, int flag)
+{
+    return status.float_exception_masks & flag;
+}
+
+/*----------------------------------------------------------------------------
+| Returns current floating point rounding mode specified by status word.
+*----------------------------------------------------------------------------*/
+
+inline int get_float_rounding_mode(const float_status_t &status)
+{
+    return status.float_rounding_mode;
+}
+
+/*----------------------------------------------------------------------------
+| Returns current floating point precision (floatx80 only).
+*----------------------------------------------------------------------------*/
+
+#ifdef FLOATX80
+inline int get_float_rounding_precision(const float_status_t &status)
+{
+    return status.float_rounding_precision;
+}
+#endif
+
+/*----------------------------------------------------------------------------
+| Returns current floating point NaN operands handling mode specified
+| by status word.
+*----------------------------------------------------------------------------*/
+
+inline int get_float_nan_handling_mode(const float_status_t &status)
+{
+    return status.float_nan_handling_mode;
+}
+
+/*----------------------------------------------------------------------------
+| Raise floating point precision lost up flag (floatx80 only).
+*----------------------------------------------------------------------------*/
+
+#ifdef FLOATX80
+inline void set_float_rounding_up(float_status_t &status)
+{
+    status.float_exception_flags |= RAISE_SW_C1;
+}
+#endif
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the <denormals-are-zeros> feature is supported;
+| otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+inline int get_denormals_are_zeros(const float_status_t &status)
+{
+    return status.denormals_are_zeros;
+}
+
+/*----------------------------------------------------------------------------
+| Returns 1 if the <flush-underflow-to-zero> feature is supported;
+| otherwise returns 0.
+*----------------------------------------------------------------------------*/
+
+inline int get_flush_underflow_to_zero(const float_status_t &status)
+{
+    return status.flush_underflow_to_zero;
+}
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE integer-to-floating-point conversion routines.
+*----------------------------------------------------------------------------*/
+float32 int32_to_float32(int32_t, float_status_t &status);
+float64 int32_to_float64(int32_t);
+float32 int64_to_float32(int64_t, float_status_t &status);
+float64 int64_to_float64(int64_t, float_status_t &status);
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE single-precision conversion routines.
+*----------------------------------------------------------------------------*/
+int32_t float32_to_int32(float32, float_status_t &status);
+int32_t float32_to_int32_round_to_zero(float32, float_status_t &status);
+int64_t float32_to_int64(float32, float_status_t &status);
+int64_t float32_to_int64_round_to_zero(float32, float_status_t &status);
+float64 float32_to_float64(float32, float_status_t &status);
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE single-precision operations.
+*----------------------------------------------------------------------------*/
+float32 float32_round_to_int(float32, float_status_t &status);
+float32 float32_add(float32, float32, float_status_t &status);
+float32 float32_sub(float32, float32, float_status_t &status);
+float32 float32_mul(float32, float32, float_status_t &status);
+float32 float32_div(float32, float32, float_status_t &status);
+float32 float32_sqrt(float32, float_status_t &status);
+float32 float32_frc(float32, float_status_t &status);
+float32 float32_muladd(float32, float32, float32, int flags, float_status_t &status);
+
+inline float32 float32_fmadd(float32 a, float32 b, float32 c, float_status_t &status)
+{
+  return float32_muladd(a, b, c, 0, status);
+}
+
+inline float32 float32_fmsub(float32 a, float32 b, float32 c, float_status_t &status)
+{
+  return float32_muladd(a, b, c, float_muladd_negate_c, status);
+}
+
+inline float32 float32_fnmadd(float32 a, float32 b, float32 c, float_status_t &status)
+{
+  return float32_muladd(a, b, c, float_muladd_negate_product, status);
+}
+
+inline float32 float32_fnmsub(float32 a, float32 b, float32 c, float_status_t &status)
+{
+  return float32_muladd(a, b, c, float_muladd_negate_result, status);
+}
+
+int float32_compare(float32, float32, float_status_t &status);
+int float32_compare_quiet(float32, float32, float_status_t &status);
+
+float_class_t float32_class(float32);
+int float32_is_signaling_nan(float32);
+int float32_is_nan(float32);
+int float32_is_denormal(float32);
+
+float32 float32_min(float32 a, float32 b, float_status_t &status);
+float32 float32_max(float32 a, float32 b, float_status_t &status);
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE double-precision conversion routines.
+*----------------------------------------------------------------------------*/
+int32_t float64_to_int32(float64, float_status_t &status);
+int32_t float64_to_int32_round_to_zero(float64, float_status_t &status);
+int64_t float64_to_int64(float64, float_status_t &status);
+int64_t float64_to_int64_round_to_zero(float64, float_status_t &status);
+float32 float64_to_float32(float64, float_status_t &status);
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE double-precision operations.
+*----------------------------------------------------------------------------*/
+float64 float64_round_to_int(float64, float_status_t &status);
+float64 float64_add(float64, float64, float_status_t &status);
+float64 float64_sub(float64, float64, float_status_t &status);
+float64 float64_mul(float64, float64, float_status_t &status);
+float64 float64_div(float64, float64, float_status_t &status);
+float64 float64_sqrt(float64, float_status_t &status);
+float64 float64_frc(float64, float_status_t &status);
+float64 float64_muladd(float64, float64, float64, int flags, float_status_t &status);
+
+inline float64 float64_fmadd(float64 a, float64 b, float64 c, float_status_t &status)
+{
+  return float64_muladd(a, b, c, 0, status);
+}
+
+inline float64 float64_fmsub(float64 a, float64 b, float64 c, float_status_t &status)
+{
+  return float64_muladd(a, b, c, float_muladd_negate_c, status);
+}
+
+inline float64 float64_fnmadd(float64 a, float64 b, float64 c, float_status_t &status)
+{
+  return float64_muladd(a, b, c, float_muladd_negate_product, status);
+}
+
+inline float64 float64_fnmsub(float64 a, float64 b, float64 c, float_status_t &status)
+{
+  return float64_muladd(a, b, c, float_muladd_negate_result, status);
+}
+
+int float64_compare(float64, float64, float_status_t &status);
+int float64_compare_quiet(float64, float64, float_status_t &status);
+
+float_class_t float64_class(float64);
+int float64_is_signaling_nan(float64);
+int float64_is_nan(float64);
+int float64_is_denormal(float64);
+
+float64 float64_min(float64 a, float64 b, float_status_t &status);
+float64 float64_max(float64 a, float64 b, float_status_t &status);
+
+#ifdef FLOAT16
+float32 float16_to_float32(float16, float_status_t &status);
+float16 float32_to_float16(float32, float_status_t &status);
+
+float_class_t float16_class(float16);
+int float16_is_signaling_nan(float16);
+int float16_is_nan(float16);
+int float16_is_denormal(float16);
+#endif
+
+#ifdef FLOATX80
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE floating-point types.
+*----------------------------------------------------------------------------*/
+
+#if defined(SOFTFLOAT_BIG_ENDIAN)
+struct floatx80 {	// leave alignment to compiler
+    uint16_t exp;
+    uint64_t fraction;
+};
+#elif defined(SOFTFLOAT_LITTLE_ENDIAN)
+struct floatx80 {
+    uint64_t fraction;
+    uint16_t exp;
+};
+#endif
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE integer-to-floating-point conversion routines.
+*----------------------------------------------------------------------------*/
+floatx80 int32_to_floatx80(int32_t);
+floatx80 int64_to_floatx80(int64_t);
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE extended double-precision conversion routines.
+*----------------------------------------------------------------------------*/
+floatx80 float32_to_floatx80(float32, float_status_t &status);
+floatx80 float64_to_floatx80(float64, float_status_t &status);
+
+int32_t floatx80_to_int32(floatx80, float_status_t &status);
+int32_t floatx80_to_int32_round_to_zero(floatx80, float_status_t &status);
+int64_t floatx80_to_int64(floatx80, float_status_t &status);
+int64_t floatx80_to_int64_round_to_zero(floatx80, float_status_t &status);
+
+float32 floatx80_to_float32(floatx80, float_status_t &status);
+float64 floatx80_to_float64(floatx80, float_status_t &status);
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE extended double-precision operations.
+*----------------------------------------------------------------------------*/
+floatx80 floatx80_round_to_int(floatx80, float_status_t &status);
+floatx80 floatx80_add(floatx80, floatx80, float_status_t &status);
+floatx80 floatx80_sub(floatx80, floatx80, float_status_t &status);
+floatx80 floatx80_mul(floatx80, floatx80, float_status_t &status);
+floatx80 floatx80_div(floatx80, floatx80, float_status_t &status);
+floatx80 floatx80_sqrt(floatx80, float_status_t &status);
+
+float_class_t floatx80_class(floatx80);
+int floatx80_is_signaling_nan(floatx80);
+int floatx80_is_nan(floatx80);
+
+#endif  /* FLOATX80 */
+
+#ifdef FLOAT128
+
+#ifdef BX_BIG_ENDIAN
+struct float128 {
+    uint64_t hi, lo;
+};
+#else
+struct float128 {
+    uint64_t lo, hi;
+};
+#endif
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE quadruple-precision conversion routines.
+*----------------------------------------------------------------------------*/
+float128 floatx80_to_float128(floatx80 a, float_status_t &status);
+floatx80 float128_to_floatx80(float128 a, float_status_t &status);
+
+float128 int64_to_float128(int64_t a);
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE extended double-precision operations.
+*----------------------------------------------------------------------------*/
+floatx80 floatx80_mul(floatx80 a, float128 b, float_status_t &status);
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE quadruple-precision operations.
+*----------------------------------------------------------------------------*/
+float128 float128_add(float128 a, float128 b, float_status_t &status);
+float128 float128_sub(float128 a, float128 b, float_status_t &status);
+float128 float128_mul(float128 a, float128 b, float_status_t &status);
+float128 float128_div(float128 a, float128 b, float_status_t &status);
+
+#endif  /* FLOAT128 */
+
+END_SOFTFLOAT_NS
+
+#endif
diff -r 372d3611c693 -r e89a7cf22da3 ext/softfloat/softfloat.cc
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ext/softfloat/softfloat.cc	Thu Sep 19 18:09:15 2013 +0200
@@ -0,0 +1,3208 @@
+/*============================================================================
+This C source file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic
+Package, Release 2b.
+
+Written by John R. Hauser.  This work was made possible in part by the
+International Computer Science Institute, located at Suite 600, 1947 Center
+Street, Berkeley, California 94704.  Funding was partially provided by the
+National Science Foundation under grant MIP-9311980.  The original version
+of this code was written as part of a project to build a fixed-point vector
+processor in collaboration with the University of California at Berkeley,
+overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
+is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
+arithmetic/SoftFloat.html'.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+=============================================================================*/
+
+#define FLOAT128
+
+/*============================================================================
+ * Adapted for Bochs (x86 achitecture simulator) by
+ *            Stanislav Shwartsman [sshwarts at sourceforge net]
+ * ==========================================================================*/
+
+#include "softfloat.hh"
+#include "softfloat-round-pack.hh"
+
+/*----------------------------------------------------------------------------
+| Primitive arithmetic functions, including multi-word arithmetic, and
+| division and square root approximations. (Can be specialized to target
+| if desired).
+*----------------------------------------------------------------------------*/
+#define USE_estimateDiv128To64
+#define USE_estimateSqrt32
+#include "softfloat-macros.hh"
+
+/*----------------------------------------------------------------------------
+| Functions and definitions to determine:  (1) whether tininess for underflow
+| is detected before or after rounding by default, (2) what (if anything)
+| happens when exceptions are raised, (3) how signaling NaNs are distinguished
+| from quiet NaNs, (4) the default generated quiet NaNs, and (5) how NaNs
+| are propagated from function inputs to output.  These details are target-
+| specific.
+*----------------------------------------------------------------------------*/
+#include "softfloat-specialize.hh"
+
+BEGIN_SOFTFLOAT_NS
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the 32-bit two's complement integer `a'
+| to the single-precision floating-point format.  The conversion is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 int32_to_float32(int32_t a, float_status_t &status)
+{
+    if (a == 0) return 0;
+    if (a == (int32_t) 0x80000000) return packFloat32(1, 0x9E, 0);
+    int zSign = (a < 0);
+    return normalizeRoundAndPackFloat32(zSign, 0x9C, zSign ? -a : a, status);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the 32-bit two's complement integer `a'
+| to the double-precision floating-point format.  The conversion is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 int32_to_float64(int32_t a)
+{
+    if (a == 0) return 0;
+    int zSign = (a < 0);
+    uint32_t absA = zSign ? -a : a;
+    int shiftCount = countLeadingZeros32(absA) + 21;
+    uint64_t zSig = absA;
+    return packFloat64(zSign, 0x432 - shiftCount, zSig<<shiftCount);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the 64-bit two's complement integer `a'
+| to the single-precision floating-point format.  The conversion is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 int64_to_float32(int64_t a, float_status_t &status)
+{
+    if (a == 0) return 0;
+    int zSign = (a < 0);
+    uint64_t absA = zSign ? -a : a;
+    int shiftCount = countLeadingZeros64(absA) - 40;
+    if (0 <= shiftCount) {
+        return packFloat32(zSign, 0x95 - shiftCount, (uint32_t)(absA<<shiftCount));
+    }
+    else {
+        shiftCount += 7;
+        if (shiftCount < 0) {
+            absA = shift64RightJamming(absA, -shiftCount);
+        }
+        else {
+            absA <<= shiftCount;
+        }
+        return roundAndPackFloat32(zSign, 0x9C - shiftCount, (uint32_t) absA, status);
+    }
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the 64-bit two's complement integer `a'
+| to the double-precision floating-point format.  The conversion is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 int64_to_float64(int64_t a, float_status_t &status)
+{
+    if (a == 0) return 0;
+    if (a == (int64_t) UINT64_C(0x8000000000000000)) {
+        return packFloat64(1, 0x43E, 0);
+    }
+    int zSign = (a < 0);
+    return normalizeRoundAndPackFloat64(zSign, 0x43C, zSign ? -a : a, status);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the single-precision floating-point value
+| `a' to the 32-bit two's complement integer format.  The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic - which means in particular that the conversion is rounded
+| according to the current rounding mode.  If `a' is a NaN or the
+| conversion overflows the integer indefinite value is returned.
+*----------------------------------------------------------------------------*/
+
+int32_t float32_to_int32(float32 a, float_status_t &status)
+{
+    uint32_t aSig = extractFloat32Frac(a);
+    int16_t aExp = extractFloat32Exp(a);
+    int aSign = extractFloat32Sign(a);
+    if ((aExp == 0xFF) && aSig) aSign = 0;
+    if (aExp) aSig |= 0x00800000;
+    else {
+        if (get_denormals_are_zeros(status)) aSig = 0;
+    }
+    int shiftCount = 0xAF - aExp;
+    uint64_t aSig64 = aSig;
+    aSig64 <<= 32;
+    if (0 < shiftCount) aSig64 = shift64RightJamming(aSig64, shiftCount);
+    return roundAndPackInt32(aSign, aSig64, status);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the single-precision floating-point value
+| `a' to the 32-bit two's complement integer format.  The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic, except that the conversion is always rounded toward zero.
+| If `a' is a NaN or the conversion overflows, the integer indefinite
+| value is returned.
+*----------------------------------------------------------------------------*/
+
+int32_t float32_to_int32_round_to_zero(float32 a, float_status_t &status)
+{
+    int aSign;
+    int16_t aExp;
+    uint32_t aSig;
+    int32_t z;
+
+    aSig = extractFloat32Frac(a);
+    aExp = extractFloat32Exp(a);
+    aSign = extractFloat32Sign(a);
+    int shiftCount = aExp - 0x9E;
+    if (0 <= shiftCount) {
+        if (a != 0xCF000000) {
+            float_raise(status, float_flag_invalid);
+        }
+        return (int32_t)(int32_indefinite);
+    }
+    else if (aExp <= 0x7E) {
+        if (get_denormals_are_zeros(status) && aExp == 0) aSig = 0;
+        if (aExp | aSig) float_raise(status, float_flag_inexact);
+        return 0;
+    }
+    aSig = (aSig | 0x00800000)<<8;
+    z = aSig>>(-shiftCount);
+    if ((uint32_t) (aSig<<(shiftCount & 31))) {
+        float_raise(status, float_flag_inexact);
+    }
+    if (aSign) z = -z;
+    return z;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the single-precision floating-point value
+| `a' to the 64-bit two's complement integer format.  The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic - which means in particular that the conversion is rounded
+| according to the current rounding mode. If `a' is a NaN or the
+| conversion overflows, the integer indefinite value is returned.
+*----------------------------------------------------------------------------*/
+
+int64_t float32_to_int64(float32 a, float_status_t &status)
+{
+    uint64_t aSig64, aSigExtra;
+
+    uint32_t aSig = extractFloat32Frac(a);
+    int16_t aExp = extractFloat32Exp(a);
+    int aSign = extractFloat32Sign(a);
+
+    int shiftCount = 0xBE - aExp;
+    if (shiftCount < 0) {
+        float_raise(status, float_flag_invalid);
+        return (int64_t)(int64_indefinite);
+    }
+    if (aExp) aSig |= 0x00800000;
+    else {
+        if (get_denormals_are_zeros(status)) aSig = 0;
+    }
+    aSig64 = aSig;
+    aSig64 <<= 40;
+    shift64ExtraRightJamming(aSig64, 0, shiftCount, &aSig64, &aSigExtra);
+    return roundAndPackInt64(aSign, aSig64, aSigExtra, status);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the single-precision floating-point value
+| `a' to the 64-bit two's complement integer format.  The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic, except that the conversion is always rounded toward zero.
+| If `a' is a NaN or the conversion overflows, the integer indefinite
+| value is returned.
+*----------------------------------------------------------------------------*/
+
+int64_t float32_to_int64_round_to_zero(float32 a, float_status_t &status)
+{
+    int aSign;
+    int16_t aExp;
+    uint32_t aSig;
+    uint64_t aSig64;
+    int64_t z;
+
+    aSig = extractFloat32Frac(a);
+    aExp = extractFloat32Exp(a);
+    aSign = extractFloat32Sign(a);
+    int shiftCount = aExp - 0xBE;
+    if (0 <= shiftCount) {
+        if (a != 0xDF000000) {
+            float_raise(status, float_flag_invalid);
+        }
+        return (int64_t)(int64_indefinite);
+    }
+    else if (aExp <= 0x7E) {
+        if (get_denormals_are_zeros(status) && aExp == 0) aSig = 0;
+        if (aExp | aSig) float_raise(status, float_flag_inexact);
+        return 0;
+    }
+    aSig64 = aSig | 0x00800000;
+    aSig64 <<= 40;
+    z = aSig64>>(-shiftCount);
+    if ((uint64_t) (aSig64<<(shiftCount & 63))) {
+        float_raise(status, float_flag_inexact);
+    }
+    if (aSign) z = -z;
+    return z;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the single-precision floating-point value
+| `a' to the double-precision floating-point format.  The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 float32_to_float64(float32 a, float_status_t &status)
+{
+    uint32_t aSig = extractFloat32Frac(a);
+    int16_t aExp = extractFloat32Exp(a);
+    int  aSign = extractFloat32Sign(a);
+
+    if (aExp == 0xFF) {
+        if (aSig) return commonNaNToFloat64(float32ToCommonNaN(a, status));
+        return packFloat64(aSign, 0x7FF, 0);
+    }
+    if (aExp == 0) {
+        if (get_denormals_are_zeros(status)) aSig = 0;
+        if (aSig == 0) return packFloat64(aSign, 0, 0);
+        float_raise(status, float_flag_denormal);
+        normalizeFloat32Subnormal(aSig, &aExp, &aSig);
+        --aExp;
+    }
+    return packFloat64(aSign, aExp + 0x380, ((uint64_t) aSig)<<29);
+}
+
+/*----------------------------------------------------------------------------
+| Rounds the single-precision floating-point value `a' to an integer, and
+| returns the result as a single-precision floating-point value.  The
+| operation is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 float32_round_to_int(float32 a, float_status_t &status)
+{
+    uint32_t lastBitMask, roundBitsMask;
+    int roundingMode = get_float_rounding_mode(status);
+
+    int16_t aExp = extractFloat32Exp(a);
+    if (0x96 <= aExp) {
+        if ((aExp == 0xFF) && extractFloat32Frac(a)) {
+            return propagateFloat32NaN(a, status);
+        }
+        return a;
+    }
+
+    if (get_denormals_are_zeros(status)) {
+        a = float32_denormal_to_zero(a);
+    }
+
+    if (aExp <= 0x7E) {
+        if ((uint32_t) (a<<1) == 0) return a;
+        float_raise(status, float_flag_inexact);
+        int aSign = extractFloat32Sign(a);
+        switch (roundingMode) {
+         case float_round_nearest_even:
+            if ((aExp == 0x7E) && extractFloat32Frac(a)) {
+                return packFloat32(aSign, 0x7F, 0);
+            }
+            break;
+         case float_round_down:
+            return aSign ? 0xBF800000 : 0;
+         case float_round_up:
+            return aSign ? 0x80000000 : 0x3F800000;
+        }
+        return packFloat32(aSign, 0, 0);
+    }
+    lastBitMask = 1;
+    lastBitMask <<= 0x96 - aExp;
+    roundBitsMask = lastBitMask - 1;
+    float32 z = a;
+    if (roundingMode == float_round_nearest_even) {
+        z += lastBitMask>>1;
+        if ((z & roundBitsMask) == 0) z &= ~lastBitMask;
+    }
+    else if (roundingMode != float_round_to_zero) {
+        if (extractFloat32Sign(z) ^ (roundingMode == float_round_up)) {
+            z += roundBitsMask;
+        }
+    }
+    z &= ~roundBitsMask;
+    if (z != a) float_raise(status, float_flag_inexact);
+    return z;
+}
+
+/*----------------------------------------------------------------------------
+| Extracts the fractional portion of single-precision floating-point value `a',
+| and returns the result  as a  single-precision  floating-point value. The
+| fractional results are precise. The operation is performed according to the
+| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 float32_frc(float32 a, float_status_t &status)
+{
+    int roundingMode = get_float_rounding_mode(status);
+
+    int16_t aExp = extractFloat32Exp(a);
+    uint32_t aSig = extractFloat32Frac(a);
+    int aSign = extractFloat32Sign(a);
+
+    if (aExp == 0xFF) {
+        if (aSig) return propagateFloat32NaN(a, status);
+        float_raise(status, float_flag_invalid);
+        return float32_default_nan;
+    }
+
+    if (aExp >= 0x96) {
+        return packFloat32(roundingMode == float_round_down, 0, 0);
+    }
+
+    if (aExp < 0x7F) {
+        if (aExp == 0) {
+            if (get_denormals_are_zeros(status)) aSig = 0;
+            if (aSig == 0) {
+                return packFloat32(roundingMode == float_round_down, 0, 0);
+            }
+
+            float_raise(status, float_flag_denormal);
+            if (! float_exception_masked(status, float_flag_underflow))
+                float_raise(status, float_flag_underflow);
+
+            if(get_flush_underflow_to_zero(status)) {
+                float_raise(status, float_flag_underflow | float_flag_inexact);
+                return packFloat32(aSign, 0, 0);
+            }
+        }
+        return a;
+    }
+
+    uint32_t lastBitMask = 1 << (0x96 - aExp);
+    uint32_t roundBitsMask = lastBitMask - 1;
+
+    aSig &= roundBitsMask;
+    aSig <<= 7;
+    aExp--;
+
+    if (aSig == 0)
+       return packFloat32(roundingMode == float_round_down, 0, 0);
+
+    return normalizeRoundAndPackFloat32(aSign, aExp, aSig, status);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of adding the absolute values of the single-precision
+| floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
+| before being returned.  `zSign' is ignored if the result is a NaN.
+| The addition is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+static float32 addFloat32Sigs(float32 a, float32 b, int zSign, float_status_t &status)
+{
+    int16_t aExp, bExp, zExp;
+    uint32_t aSig, bSig, zSig;
+    int16_t expDiff;
+
+    aSig = extractFloat32Frac(a);
+    aExp = extractFloat32Exp(a);
+    bSig = extractFloat32Frac(b);
+    bExp = extractFloat32Exp(b);
+
+    if (get_denormals_are_zeros(status)) {
+        if (aExp == 0) aSig = 0;
+        if (bExp == 0) bSig = 0;
+    }
+
+    expDiff = aExp - bExp;
+    aSig <<= 6;
+    bSig <<= 6;
+
+    if (0 < expDiff) {
+        if (aExp == 0xFF) {
+            if (aSig) return propagateFloat32NaN(a, b, status);
+            if (bSig && (bExp == 0)) float_raise(status, float_flag_denormal);
+            return a;
+        }
+        if ((aExp == 0) && aSig)
+            float_raise(status, float_flag_denormal);
+
+        if (bExp == 0) {
+            if (bSig) float_raise(status, float_flag_denormal);
+            --expDiff;
+        }
+        else bSig |= 0x20000000;
+
+        bSig = shift32RightJamming(bSig, expDiff);
+        zExp = aExp;
+    }
+    else if (expDiff < 0) {
+        if (bExp == 0xFF) {
+            if (bSig) return propagateFloat32NaN(a, b, status);
+            if (aSig && (aExp == 0)) float_raise(status, float_flag_denormal);
+            return packFloat32(zSign, 0xFF, 0);
+        }
+        if ((bExp == 0) && bSig)
+            float_raise(status, float_flag_denormal);
+
+        if (aExp == 0) {
+            if (aSig) float_raise(status, float_flag_denormal);
+            ++expDiff;
+        }
+        else aSig |= 0x20000000;
+
+        aSig = shift32RightJamming(aSig, -expDiff);
+        zExp = bExp;
+    }
+    else {
+        if (aExp == 0xFF) {
+            if (aSig | bSig) return propagateFloat32NaN(a, b, status);
+            return a;
+        }
+        if (aExp == 0) {
+            zSig = (aSig + bSig) >> 6;
+            if (aSig | bSig) {
+                float_raise(status, float_flag_denormal);
+                if (get_flush_underflow_to_zero(status)) {
+                    float_raise(status, float_flag_underflow | float_flag_inexact);
+                    return packFloat32(zSign, 0, 0);
+                }
+                if (! float_exception_masked(status, float_flag_underflow)) {
+                    if (extractFloat32Frac(zSig) == zSig)
+                        float_raise(status, float_flag_underflow);
+                }
+            }
+            return packFloat32(zSign, 0, zSig);
+        }
+        zSig = 0x40000000 + aSig + bSig;
+        return roundAndPackFloat32(zSign, aExp, zSig, status);
+    }
+    aSig |= 0x20000000;
+    zSig = (aSig + bSig)<<1;
+    --zExp;
+    if ((int32_t) zSig < 0) {
+        zSig = aSig + bSig;
+        ++zExp;
+    }
+    return roundAndPackFloat32(zSign, zExp, zSig, status);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of subtracting the absolute values of the single-
+| precision floating-point values `a' and `b'.  If `zSign' is 1, the
+| difference is negated before being returned.  `zSign' is ignored if the
+| result is a NaN.  The subtraction is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+static float32 subFloat32Sigs(float32 a, float32 b, int zSign, float_status_t &status)
+{
+    int16_t aExp, bExp, zExp;
+    uint32_t aSig, bSig, zSig;
+    int16_t expDiff;
+
+    aSig = extractFloat32Frac(a);
+    aExp = extractFloat32Exp(a);
+    bSig = extractFloat32Frac(b);
+    bExp = extractFloat32Exp(b);
+
+    if (get_denormals_are_zeros(status)) {
+        if (aExp == 0) aSig = 0;
+        if (bExp == 0) bSig = 0;
+    }
+
+    expDiff = aExp - bExp;
+    aSig <<= 7;
+    bSig <<= 7;
+    if (0 < expDiff) goto aExpBigger;
+    if (expDiff < 0) goto bExpBigger;
+    if (aExp == 0xFF) {
+        if (aSig | bSig) return propagateFloat32NaN(a, b, status);
+        float_raise(status, float_flag_invalid);
+        return float32_default_nan;
+    }
+    if (aExp == 0) {
+        if (aSig | bSig) float_raise(status, float_flag_denormal);
+        aExp = 1;
+        bExp = 1;
+    }
+    if (bSig < aSig) goto aBigger;
+    if (aSig < bSig) goto bBigger;
+    return packFloat32(get_float_rounding_mode(status) == float_round_down, 0, 0);
+ bExpBigger:
+    if (bExp == 0xFF) {
+        if (bSig) return propagateFloat32NaN(a, b, status);
+        if (aSig && (aExp == 0)) float_raise(status, float_flag_denormal);
+        return packFloat32(zSign ^ 1, 0xFF, 0);
+    }
+    if ((bExp == 0) && bSig)
+        float_raise(status, float_flag_denormal);
+
+    if (aExp == 0) {
+        if (aSig) float_raise(status, float_flag_denormal);
+        ++expDiff;
+    }
+    else aSig |= 0x40000000;
+
+    aSig = shift32RightJamming(aSig, -expDiff);
+    bSig |= 0x40000000;
+ bBigger:
+    zSig = bSig - aSig;
+    zExp = bExp;
+    zSign ^= 1;
+    goto normalizeRoundAndPack;
+ aExpBigger:
+    if (aExp == 0xFF) {
+        if (aSig) return propagateFloat32NaN(a, b, status);
+        if (bSig && (bExp == 0)) float_raise(status, float_flag_denormal);
+        return a;
+    }
+    if ((aExp == 0) && aSig)
+        float_raise(status, float_flag_denormal);
+
+    if (bExp == 0) {
+        if (bSig) float_raise(status, float_flag_denormal);
+        --expDiff;
+    }
+    else bSig |= 0x40000000;
+
+    bSig = shift32RightJamming(bSig, expDiff);
+    aSig |= 0x40000000;
+ aBigger:
+    zSig = aSig - bSig;
+    zExp = aExp;
+ normalizeRoundAndPack:
+    --zExp;
+    return normalizeRoundAndPackFloat32(zSign, zExp, zSig, status);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of adding the single-precision floating-point values `a'
+| and `b'.  The operation is performed according to the IEC/IEEE Standard for
+| Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 float32_add(float32 a, float32 b, float_status_t &status)
+{
+    int aSign = extractFloat32Sign(a);
+    int bSign = extractFloat32Sign(b);
+
+    if (aSign == bSign) {
+        return addFloat32Sigs(a, b, aSign, status);
+    }
+    else {
+        return subFloat32Sigs(a, b, aSign, status);
+    }
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of subtracting the single-precision floating-point values
+| `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
+| for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 float32_sub(float32 a, float32 b, float_status_t &status)
+{
+    int aSign = extractFloat32Sign(a);
+    int bSign = extractFloat32Sign(b);
+
+    if (aSign == bSign) {
+        return subFloat32Sigs(a, b, aSign, status);
+    }
+    else {
+        return addFloat32Sigs(a, b, aSign, status);
+    }
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of multiplying the single-precision floating-point values
+| `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
+| for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 float32_mul(float32 a, float32 b, float_status_t &status)
+{
+    int aSign, bSign, zSign;
+    int16_t aExp, bExp, zExp;
+    uint32_t aSig, bSig;
+    uint64_t zSig64;
+    uint32_t zSig;
+
+    aSig = extractFloat32Frac(a);
+    aExp = extractFloat32Exp(a);
+    aSign = extractFloat32Sign(a);
+    bSig = extractFloat32Frac(b);
+    bExp = extractFloat32Exp(b);
+    bSign = extractFloat32Sign(b);
+    zSign = aSign ^ bSign;
+
+    if (get_denormals_are_zeros(status)) {
+        if (aExp == 0) aSig = 0;
+        if (bExp == 0) bSig = 0;
+    }
+
+    if (aExp == 0xFF) {
+        if (aSig || ((bExp == 0xFF) && bSig))
+            return propagateFloat32NaN(a, b, status);
+
+        if ((bExp | bSig) == 0) {
+            float_raise(status, float_flag_invalid);
+            return float32_default_nan;
+        }
+        if (bSig && (bExp == 0)) float_raise(status, float_flag_denormal);
+        return packFloat32(zSign, 0xFF, 0);
+    }
+    if (bExp == 0xFF) {
+        if (bSig) return propagateFloat32NaN(a, b, status);
+        if ((aExp | aSig) == 0) {
+            float_raise(status, float_flag_invalid);
+            return float32_default_nan;
+        }
+        if (aSig && (aExp == 0)) float_raise(status, float_flag_denormal);
+        return packFloat32(zSign, 0xFF, 0);
+    }
+    if (aExp == 0) {
+        if (aSig == 0) {
+            if (bSig && (bExp == 0)) float_raise(status, float_flag_denormal);
+            return packFloat32(zSign, 0, 0);
+        }
+        float_raise(status, float_flag_denormal);
+        normalizeFloat32Subnormal(aSig, &aExp, &aSig);
+    }
+    if (bExp == 0) {
+        if (bSig == 0) return packFloat32(zSign, 0, 0);
+        float_raise(status, float_flag_denormal);
+        normalizeFloat32Subnormal(bSig, &bExp, &bSig);
+    }
+    zExp = aExp + bExp - 0x7F;
+    aSig = (aSig | 0x00800000)<<7;
+    bSig = (bSig | 0x00800000)<<8;
+    zSig64 = shift64RightJamming(((uint64_t) aSig) * bSig, 32);
+    zSig = (uint32_t) zSig64;
+    if (0 <= (int32_t) (zSig<<1)) {
+        zSig <<= 1;
+        --zExp;
+    }
+    return roundAndPackFloat32(zSign, zExp, zSig, status);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of dividing the single-precision floating-point value `a'
+| by the corresponding value `b'.  The operation is performed according to the
+| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 float32_div(float32 a, float32 b, float_status_t &status)
+{
+    int aSign, bSign, zSign;
+    int16_t aExp, bExp, zExp;
+    uint32_t aSig, bSig, zSig;
+
+    aSig = extractFloat32Frac(a);
+    aExp = extractFloat32Exp(a);
+    aSign = extractFloat32Sign(a);
+    bSig = extractFloat32Frac(b);
+    bExp = extractFloat32Exp(b);
+    bSign = extractFloat32Sign(b);
+    zSign = aSign ^ bSign;
+
+    if (get_denormals_are_zeros(status)) {
+        if (aExp == 0) aSig = 0;
+        if (bExp == 0) bSig = 0;
+    }
+
+    if (aExp == 0xFF) {
+        if (aSig) return propagateFloat32NaN(a, b, status);
+        if (bExp == 0xFF) {
+            if (bSig) return propagateFloat32NaN(a, b, status);
+            float_raise(status, float_flag_invalid);
+            return float32_default_nan;
+        }
+        if (bSig && (bExp == 0)) float_raise(status, float_flag_denormal);
+        return packFloat32(zSign, 0xFF, 0);
+    }
+    if (bExp == 0xFF) {
+        if (bSig) return propagateFloat32NaN(a, b, status);
+        if (aSig && (aExp == 0)) float_raise(status, float_flag_denormal);
+        return packFloat32(zSign, 0, 0);
+    }
+    if (bExp == 0) {
+        if (bSig == 0) {
+            if ((aExp | aSig) == 0) {
+                float_raise(status, float_flag_invalid);
+                return float32_default_nan;
+            }
+            float_raise(status, float_flag_divbyzero);
+            return packFloat32(zSign, 0xFF, 0);
+        }
+        float_raise(status, float_flag_denormal);
+        normalizeFloat32Subnormal(bSig, &bExp, &bSig);
+    }
+    if (aExp == 0) {
+        if (aSig == 0) return packFloat32(zSign, 0, 0);
+        float_raise(status, float_flag_denormal);
+        normalizeFloat32Subnormal(aSig, &aExp, &aSig);
+    }
+    zExp = aExp - bExp + 0x7D;
+    aSig = (aSig | 0x00800000)<<7;
+    bSig = (bSig | 0x00800000)<<8;
+    if (bSig <= (aSig + aSig)) {
+        aSig >>= 1;
+        ++zExp;
+    }
+    zSig = (((uint64_t) aSig)<<32) / bSig;
+    if ((zSig & 0x3F) == 0) {
+        zSig |= ((uint64_t) bSig * zSig != ((uint64_t) aSig)<<32);
+    }
+    return roundAndPackFloat32(zSign, zExp, zSig, status);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the square root of the single-precision floating-point value `a'.
+| The operation is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 float32_sqrt(float32 a, float_status_t &status)
+{
+    int aSign;
+    int16_t aExp, zExp;
+    uint32_t aSig, zSig;
+    uint64_t rem, term;
+
+    aSig = extractFloat32Frac(a);
+    aExp = extractFloat32Exp(a);
+    aSign = extractFloat32Sign(a);
+
+    if (aExp == 0xFF) {
+        if (aSig) return propagateFloat32NaN(a, status);
+        if (! aSign) return a;
+        float_raise(status, float_flag_invalid);
+        return float32_default_nan;
+    }
+
+    if (get_denormals_are_zeros(status)) {
+        if (aExp == 0) aSig = 0;
+    }
+
+    if (aSign) {
+        if ((aExp | aSig) == 0) return packFloat32(aSign, 0, 0);
+        float_raise(status, float_flag_invalid);
+        return float32_default_nan;
+    }
+    if (aExp == 0) {
+        if (aSig == 0) return 0;
+        float_raise(status, float_flag_denormal);
+        normalizeFloat32Subnormal(aSig, &aExp, &aSig);
+    }
+    zExp = ((aExp - 0x7F)>>1) + 0x7E;
+    aSig = (aSig | 0x00800000)<<8;
+    zSig = estimateSqrt32(aExp, aSig) + 2;
+    if ((zSig & 0x7F) <= 5) {
+        if (zSig < 2) {
+            zSig = 0x7FFFFFFF;
+            goto roundAndPack;
+        }
+        aSig >>= aExp & 1;
+        term = ((uint64_t) zSig) * zSig;
+        rem = (((uint64_t) aSig)<<32) - term;
+        while ((int64_t) rem < 0) {
+            --zSig;
+            rem += (((uint64_t) zSig)<<1) | 1;
+        }
+        zSig |= (rem != 0);
+    }
+    zSig = shift32RightJamming(zSig, 1);
+ roundAndPack:
+    return roundAndPackFloat32(0, zExp, zSig, status);
+}
+
+/*----------------------------------------------------------------------------
+| Determine single-precision floating-point number class
+*----------------------------------------------------------------------------*/
+
+float_class_t float32_class(float32 a)
+{
+   int16_t aExp = extractFloat32Exp(a);
+   uint32_t aSig = extractFloat32Frac(a);
+   int  aSign = extractFloat32Sign(a);
+
+   if(aExp == 0xFF) {
+       if (aSig == 0)
+           return (aSign) ? float_negative_inf : float_positive_inf;
+
+       return float_NaN;
+   }
+
+   if(aExp == 0) {
+       if (aSig == 0) return float_zero;
+       return float_denormal;
+   }
+
+   return float_normalized;
+}
+
+/*----------------------------------------------------------------------------
+| Compare  between  two  single  precision  floating  point  numbers. Returns
+| 'float_relation_equal'  if the operands are equal, 'float_relation_less' if
+| the    value    'a'   is   less   than   the   corresponding   value   `b',
+| 'float_relation_greater' if the value 'a' is greater than the corresponding
+| value `b', or 'float_relation_unordered' otherwise.
+*----------------------------------------------------------------------------*/
+
+int float32_compare(float32 a, float32 b, float_status_t &status)
+{
+    if (get_denormals_are_zeros(status)) {
+        a = float32_denormal_to_zero(a);
+        b = float32_denormal_to_zero(b);
+    }
+
+    float_class_t aClass = float32_class(a);
+    float_class_t bClass = float32_class(b);
+
+    if (aClass == float_NaN || bClass == float_NaN) {
+        float_raise(status, float_flag_invalid);
+        return float_relation_unordered;
+    }
+
+    if (aClass == float_denormal || bClass == float_denormal)
+    {
+        float_raise(status, float_flag_denormal);
+    }
+
+    if ((a == b) || ((uint32_t) ((a | b)<<1) == 0)) return float_relation_equal;
+
+    int aSign = extractFloat32Sign(a);
+    int bSign = extractFloat32Sign(b);
+    if (aSign != bSign)
+        return (aSign) ? float_relation_less : float_relation_greater;
+
+    if (aSign ^ (a < b)) return float_relation_less;
+    return float_relation_greater;
+}
+
+/*----------------------------------------------------------------------------
+| Compare  between  two  double  precision  floating  point  numbers. Returns
+| 'float_relation_equal'  if the operands are equal, 'float_relation_less' if
+| the    value    'a'   is   less   than   the   corresponding   value   `b',
+| 'float_relation_greater' if the value 'a' is greater than the corresponding
+| value `b', or 'float_relation_unordered' otherwise. Quiet NaNs do not cause
+| an exception.
+*----------------------------------------------------------------------------*/
+
+int float32_compare_quiet(float32 a, float32 b, float_status_t &status)
+{
+    if (get_denormals_are_zeros(status)) {
+        a = float32_denormal_to_zero(a);
+        b = float32_denormal_to_zero(b);
+    }
+
+    float_class_t aClass = float32_class(a);
+    float_class_t bClass = float32_class(b);
+
+    if (aClass == float_NaN || bClass == float_NaN)
+    {
+        if (float32_is_signaling_nan(a) || float32_is_signaling_nan(b))
+        {
+            float_raise(status, float_flag_invalid);
+        }
+        return float_relation_unordered;
+    }
+
+    if (aClass == float_denormal || bClass == float_denormal)
+    {
+        float_raise(status, float_flag_denormal);
+    }
+
+    if ((a == b) || ((uint32_t) ((a | b)<<1) == 0)) return float_relation_equal;
+
+    int aSign = extractFloat32Sign(a);
+    int bSign = extractFloat32Sign(b);
+    if (aSign != bSign)
+        return (aSign) ? float_relation_less : float_relation_greater;
+
+    if (aSign ^ (a < b)) return float_relation_less;
+    return float_relation_greater;
+}
+
+/*----------------------------------------------------------------------------
+| Compare bewteen two single precision floating point numbers and return the
+| smaller of  them.  The operation  is performed  according to  the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 float32_min(float32 a, float32 b, float_status_t &status)
+{
+  if (get_denormals_are_zeros(status)) {
+    a = float32_denormal_to_zero(a);
+    b = float32_denormal_to_zero(b);
+  }
+
+  return (float32_compare(a, b, status) == float_relation_less) ? a : b;
+}
+
+/*----------------------------------------------------------------------------
+| Compare bewteen two single precision floating point numbers and return the
+| larger  of  them.  The operation  is performed  according to  the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 float32_max(float32 a, float32 b, float_status_t &status)
+{
+  if (get_denormals_are_zeros(status)) {
+    a = float32_denormal_to_zero(a);
+    b = float32_denormal_to_zero(b);
+  }
+
+  return (float32_compare(a, b, status) == float_relation_greater) ? a : b;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the double-precision floating-point value
+| `a' to the 32-bit two's complement integer format.  The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic - which means in particular that the conversion is rounded
+| according to the current rounding mode. If `a' is a NaN or the
+| conversion overflows, the integer indefinite value is returned.
+*----------------------------------------------------------------------------*/
+
+int32_t float64_to_int32(float64 a, float_status_t &status)
+{
+    uint64_t aSig = extractFloat64Frac(a);
+    int16_t aExp = extractFloat64Exp(a);
+    int aSign = extractFloat64Sign(a);
+    if ((aExp == 0x7FF) && aSig) aSign = 0;
+    if (aExp) aSig |= UINT64_C(0x0010000000000000);
+    else {
+        if (get_denormals_are_zeros(status)) aSig = 0;
+    }
+    int shiftCount = 0x42C - aExp;
+    if (0 < shiftCount) aSig = shift64RightJamming(aSig, shiftCount);
+    return roundAndPackInt32(aSign, aSig, status);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the double-precision floating-point value
+| `a' to the 32-bit two's complement integer format.  The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic, except that the conversion is always rounded toward zero.
+| If `a' is a NaN or the conversion overflows, the integer indefinite
+| value  is returned.
+*----------------------------------------------------------------------------*/
+
+int32_t float64_to_int32_round_to_zero(float64 a, float_status_t &status)
+{
+    int aSign;
+    int16_t aExp;
+    uint64_t aSig, savedASig;
+    int32_t z;
+    int shiftCount;
+
+    aSig = extractFloat64Frac(a);
+    aExp = extractFloat64Exp(a);
+    aSign = extractFloat64Sign(a);
+    if (0x41E < aExp) {
+        if ((aExp == 0x7FF) && aSig) aSign = 0;
+        goto invalid;
+    }
+    else if (aExp < 0x3FF) {
+        if (get_denormals_are_zeros(status) && aExp == 0) aSig = 0;
+        if (aExp || aSig) float_raise(status, float_flag_inexact);
+        return 0;
+    }
+    aSig |= UINT64_C(0x0010000000000000);
+    shiftCount = 0x433 - aExp;
+    savedASig = aSig;
+    aSig >>= shiftCount;
+    z = (int32_t) aSig;
+    if (aSign) z = -z;
+    if ((z < 0) ^ aSign) {
+ invalid:
+        float_raise(status, float_flag_invalid);
+        return (int32_t)(int32_indefinite);
+    }
+    if ((aSig<<shiftCount) != savedASig) {
+        float_raise(status, float_flag_inexact);
+    }
+    return z;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the double-precision floating-point value
+| `a' to the 64-bit two's complement integer format.  The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic - which means in particular that the conversion is rounded
+| according to the current rounding mode.  If `a' is a NaN, the largest
+| positive integer is returned.  Otherwise, if the conversion overflows, the
+| largest integer with the same sign as `a' is returned.
+*----------------------------------------------------------------------------*/
+
+int64_t float64_to_int64(float64 a, float_status_t &status)
+{
+    int aSign;
+    int16_t aExp;
+    uint64_t aSig, aSigExtra;
+
+    aSig = extractFloat64Frac(a);
+    aExp = extractFloat64Exp(a);
+    aSign = extractFloat64Sign(a);
+    if (aExp) aSig |= UINT64_C(0x0010000000000000);
+    else {
+        if (get_denormals_are_zeros(status)) aSig = 0;
+    }
+    int shiftCount = 0x433 - aExp;
+    if (shiftCount <= 0) {
+        if (0x43E < aExp) {
+            float_raise(status, float_flag_invalid);
+            return (int64_t)(int64_indefinite);
+        }
+        aSigExtra = 0;
+        aSig <<= -shiftCount;
+    }
+    else {
+        shift64ExtraRightJamming(aSig, 0, shiftCount, &aSig, &aSigExtra);
+    }
+    return roundAndPackInt64(aSign, aSig, aSigExtra, status);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the double-precision floating-point value
+| `a' to the 64-bit two's complement integer format.  The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic, except that the conversion is always rounded toward zero.
+| If `a' is a NaN or the conversion overflows, the integer indefinite
+| value  is returned.
+*----------------------------------------------------------------------------*/
+
+int64_t float64_to_int64_round_to_zero(float64 a, float_status_t &status)
+{
+    int aSign;
+    int16_t aExp;
+    uint64_t aSig;
+    int64_t z;
+
+    aSig = extractFloat64Frac(a);
+    aExp = extractFloat64Exp(a);
+    aSign = extractFloat64Sign(a);
+    if (aExp) aSig |= UINT64_C(0x0010000000000000);
+    int shiftCount = aExp - 0x433;
+    if (0 <= shiftCount) {
+        if (0x43E <= aExp) {
+            if (a != UINT64_C(0xC3E0000000000000)) {
+                float_raise(status, float_flag_invalid);
+            }
+            return (int64_t)(int64_indefinite);
+        }
+        z = aSig<<shiftCount;
+    }
+    else {
+        if (aExp < 0x3FE) {
+            if (get_denormals_are_zeros(status) && aExp == 0) aSig = 0;
+            if (aExp | aSig) float_raise(status, float_flag_inexact);
+            return 0;
+        }
+        z = aSig>>(-shiftCount);
+        if ((uint64_t) (aSig<<(shiftCount & 63))) {
+             float_raise(status, float_flag_inexact);
+        }
+    }
+    if (aSign) z = -z;
+    return z;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the double-precision floating-point value
+| `a' to the single-precision floating-point format.  The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 float64_to_float32(float64 a, float_status_t &status)
+{
+    int aSign;
+    int16_t aExp;
+    uint64_t aSig;
+    uint32_t zSig;
+
+    aSig = extractFloat64Frac(a);
+    aExp = extractFloat64Exp(a);
+    aSign = extractFloat64Sign(a);
+    if (aExp == 0x7FF) {
+        if (aSig) return commonNaNToFloat32(float64ToCommonNaN(a, status));
+        return packFloat32(aSign, 0xFF, 0);
+    }
+    if (aExp == 0) {
+        if (get_denormals_are_zeros(status)) aSig = 0;
+        if (aSig == 0) return packFloat32(aSign, 0, 0);
+        float_raise(status, float_flag_denormal);
+    }
+    aSig = shift64RightJamming(aSig, 22);
+    zSig = (uint32_t) aSig;
+    if (aExp || zSig) {
+        zSig |= 0x40000000;
+        aExp -= 0x381;
+    }
+    return roundAndPackFloat32(aSign, aExp, zSig, status);
+}
+
+/*----------------------------------------------------------------------------
+| Rounds the double-precision floating-point value `a' to an integer, and
+| returns the result as a double-precision floating-point value.  The
+| operation is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 float64_round_to_int(float64 a, float_status_t &status)
+{
+    int16_t aExp;
+    uint64_t lastBitMask, roundBitsMask;
+    int roundingMode = get_float_rounding_mode(status);
+    float64 z;
+
+    aExp = extractFloat64Exp(a);
+    if (0x433 <= aExp) {
+        if ((aExp == 0x7FF) && extractFloat64Frac(a)) {
+            return propagateFloat64NaN(a, status);
+        }
+        return a;
+    }
+
+    if (get_denormals_are_zeros(status)) {
+        a = float64_denormal_to_zero(a);
+    }
+
+    if (aExp < 0x3FF) {
+        if ((uint64_t) (a<<1) == 0) return a;
+        float_raise(status, float_flag_inexact);
+        int aSign = extractFloat64Sign(a);
+        switch (roundingMode) {
+         case float_round_nearest_even:
+            if ((aExp == 0x3FE) && extractFloat64Frac(a)) {
+              return packFloat64(aSign, 0x3FF, 0);
+            }
+            break;
+         case float_round_down:
+            return aSign ? UINT64_C(0xBFF0000000000000) : 0;
+         case float_round_up:
+            return
+              aSign ? UINT64_C(0x8000000000000000) : UINT64_C(0x3FF0000000000000);
+        }
+        return packFloat64(aSign, 0, 0);
+    }
+    lastBitMask = 1;
+    lastBitMask <<= 0x433 - aExp;
+    roundBitsMask = lastBitMask - 1;
+    z = a;
+    if (roundingMode == float_round_nearest_even) {
+        z += lastBitMask>>1;
+        if ((z & roundBitsMask) == 0) z &= ~lastBitMask;
+    }
+    else if (roundingMode != float_round_to_zero) {
+        if (extractFloat64Sign(z) ^ (roundingMode == float_round_up)) {
+            z += roundBitsMask;
+        }
+    }
+    z &= ~roundBitsMask;
+    if (z != a) float_raise(status, float_flag_inexact);
+    return z;
+}
+
+/*----------------------------------------------------------------------------
+| Extracts the fractional portion of double-precision floating-point value `a',
+| and returns the result  as a  double-precision  floating-point value. The
+| fractional results are precise. The operation is performed according to the
+| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 float64_frc(float64 a, float_status_t &status)
+{
+    int roundingMode = get_float_rounding_mode(status);
+
+    uint64_t aSig = extractFloat64Frac(a);
+    int16_t aExp = extractFloat64Exp(a);
+    int aSign = extractFloat64Sign(a);
+
+    if (aExp == 0x7FF) {
+        if (aSig) return propagateFloat64NaN(a, status);
+        float_raise(status, float_flag_invalid);
+        return float64_default_nan;
+    }
+
+    if (aExp >= 0x433) {
+        return packFloat64(roundingMode == float_round_down, 0, 0);
+    }
+
+    if (aExp < 0x3FF) {
+        if (aExp == 0) {
+            if (get_denormals_are_zeros(status)) aSig = 0;
+            if (aSig == 0) {
+                return packFloat64(roundingMode == float_round_down, 0, 0);
+            }
+
+            float_raise(status, float_flag_denormal);
+            if (! float_exception_masked(status, float_flag_underflow))
+                float_raise(status, float_flag_underflow);
+
+            if(get_flush_underflow_to_zero(status)) {
+                float_raise(status, float_flag_underflow | float_flag_inexact);
+                return packFloat64(aSign, 0, 0);
+            }
+        }
+        return a;
+    }
+
+    uint64_t lastBitMask = UINT64_C(1) << (0x433 - aExp);
+    uint64_t roundBitsMask = lastBitMask - 1;
+
+    aSig &= roundBitsMask;
+    aSig <<= 10;
+    aExp--;
+
+    if (aSig == 0)
+       return packFloat64(roundingMode == float_round_down, 0, 0);
+
+    return normalizeRoundAndPackFloat64(aSign, aExp, aSig, status);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of adding the absolute values of the double-precision
+| floating-point values `a' and `b'.  If `zSign' is 1, the sum is negated
+| before being returned.  `zSign' is ignored if the result is a NaN.
+| The addition is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+static float64 addFloat64Sigs(float64 a, float64 b, int zSign, float_status_t &status)
+{
+    int16_t aExp, bExp, zExp;
+    uint64_t aSig, bSig, zSig;
+    int16_t expDiff;
+
+    aSig = extractFloat64Frac(a);
+    aExp = extractFloat64Exp(a);
+    bSig = extractFloat64Frac(b);
+    bExp = extractFloat64Exp(b);
+
+    if (get_denormals_are_zeros(status)) {
+        if (aExp == 0) aSig = 0;
+        if (bExp == 0) bSig = 0;
+    }
+
+    expDiff = aExp - bExp;
+    aSig <<= 9;
+    bSig <<= 9;
+    if (0 < expDiff) {
+        if (aExp == 0x7FF) {
+            if (aSig) return propagateFloat64NaN(a, b, status);
+            if (bSig && (bExp == 0)) float_raise(status, float_flag_denormal);
+            return a;
+        }
+        if ((aExp == 0) && aSig)
+            float_raise(status, float_flag_denormal);
+
+        if (bExp == 0) {
+            if (bSig) float_raise(status, float_flag_denormal);
+            --expDiff;
+        }
+        else bSig |= UINT64_C(0x2000000000000000);
+
+        bSig = shift64RightJamming(bSig, expDiff);
+        zExp = aExp;
+    }
+    else if (expDiff < 0) {
+        if (bExp == 0x7FF) {
+            if (bSig) return propagateFloat64NaN(a, b, status);
+            if (aSig && (aExp == 0)) float_raise(status, float_flag_denormal);
+            return packFloat64(zSign, 0x7FF, 0);
+        }
+        if ((bExp == 0) && bSig)
+            float_raise(status, float_flag_denormal);
+
+        if (aExp == 0) {
+            if (aSig) float_raise(status, float_flag_denormal);
+            ++expDiff;
+        }
+        else aSig |= UINT64_C(0x2000000000000000);
+
+        aSig = shift64RightJamming(aSig, -expDiff);
+        zExp = bExp;
+    }
+    else {
+        if (aExp == 0x7FF) {
+            if (aSig | bSig) return propagateFloat64NaN(a, b, status);
+            return a;
+        }
+        if (aExp == 0) {
+            zSig = (aSig + bSig) >> 9;
+            if (aSig | bSig) {
+                float_raise(status, float_flag_denormal);
+                if (get_flush_underflow_to_zero(status)) {
+                    float_raise(status, float_flag_underflow | float_flag_inexact);
+                    return packFloat64(zSign, 0, 0);
+                }
+                if (! float_exception_masked(status, float_flag_underflow)) {
+                    if (extractFloat64Frac(zSig) == zSig)
+                        float_raise(status, float_flag_underflow);
+                }
+            }
+            return packFloat64(zSign, 0, zSig);
+        }
+        zSig = UINT64_C(0x4000000000000000) + aSig + bSig;
+        return roundAndPackFloat64(zSign, aExp, zSig, status);
+    }
+    aSig |= UINT64_C(0x2000000000000000);
+    zSig = (aSig + bSig)<<1;
+    --zExp;
+    if ((int64_t) zSig < 0) {
+        zSig = aSig + bSig;
+        ++zExp;
+    }
+    return roundAndPackFloat64(zSign, zExp, zSig, status);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of subtracting the absolute values of the double-
+| precision floating-point values `a' and `b'.  If `zSign' is 1, the
+| difference is negated before being returned.  `zSign' is ignored if the
+| result is a NaN.  The subtraction is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+static float64 subFloat64Sigs(float64 a, float64 b, int zSign, float_status_t &status)
+{
+    int16_t aExp, bExp, zExp;
+    uint64_t aSig, bSig, zSig;
+    int16_t expDiff;
+
+    aSig = extractFloat64Frac(a);
+    aExp = extractFloat64Exp(a);
+    bSig = extractFloat64Frac(b);
+    bExp = extractFloat64Exp(b);
+
+    if (get_denormals_are_zeros(status)) {
+        if (aExp == 0) aSig = 0;
+        if (bExp == 0) bSig = 0;
+    }
+
+    expDiff = aExp - bExp;
+    aSig <<= 10;
+    bSig <<= 10;
+    if (0 < expDiff) goto aExpBigger;
+    if (expDiff < 0) goto bExpBigger;
+    if (aExp == 0x7FF) {
+        if (aSig | bSig) return propagateFloat64NaN(a, b, status);
+        float_raise(status, float_flag_invalid);
+        return float64_default_nan;
+    }
+    if (aExp == 0) {
+        if (aSig | bSig) float_raise(status, float_flag_denormal);
+        aExp = 1;
+        bExp = 1;
+    }
+    if (bSig < aSig) goto aBigger;
+    if (aSig < bSig) goto bBigger;
+    return packFloat64(get_float_rounding_mode(status) == float_round_down, 0, 0);
+ bExpBigger:
+    if (bExp == 0x7FF) {
+        if (bSig) return propagateFloat64NaN(a, b, status);
+        if (aSig && (aExp == 0)) float_raise(status, float_flag_denormal);
+        return packFloat64(zSign ^ 1, 0x7FF, 0);
+    }
+    if ((bExp == 0) && bSig)
+        float_raise(status, float_flag_denormal);
+
+    if (aExp == 0) {
+        if (aSig) float_raise(status, float_flag_denormal);
+        ++expDiff;
+    }
+    else aSig |= UINT64_C(0x4000000000000000);
+
+    aSig = shift64RightJamming(aSig, -expDiff);
+    bSig |= UINT64_C(0x4000000000000000);
+ bBigger:
+    zSig = bSig - aSig;
+    zExp = bExp;
+    zSign ^= 1;
+    goto normalizeRoundAndPack;
+ aExpBigger:
+    if (aExp == 0x7FF) {
+        if (aSig) return propagateFloat64NaN(a, b, status);
+        if (bSig && (bExp == 0)) float_raise(status, float_flag_denormal);
+        return a;
+    }
+    if ((aExp == 0) && aSig)
+        float_raise(status, float_flag_denormal);
+
+    if (bExp == 0) {
+        if (bSig) float_raise(status, float_flag_denormal);
+        --expDiff;
+    }
+    else bSig |= UINT64_C(0x4000000000000000);
+
+    bSig = shift64RightJamming(bSig, expDiff);
+    aSig |= UINT64_C(0x4000000000000000);
+ aBigger:
+    zSig = aSig - bSig;
+    zExp = aExp;
+ normalizeRoundAndPack:
+    --zExp;
+    return normalizeRoundAndPackFloat64(zSign, zExp, zSig, status);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of adding the double-precision floating-point values `a'
+| and `b'.  The operation is performed according to the IEC/IEEE Standard for
+| Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 float64_add(float64 a, float64 b, float_status_t &status)
+{
+    int aSign = extractFloat64Sign(a);
+    int bSign = extractFloat64Sign(b);
+
+    if (aSign == bSign) {
+        return addFloat64Sigs(a, b, aSign, status);
+    }
+    else {
+        return subFloat64Sigs(a, b, aSign, status);
+    }
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of subtracting the double-precision floating-point values
+| `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
+| for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 float64_sub(float64 a, float64 b, float_status_t &status)
+{
+    int aSign = extractFloat64Sign(a);
+    int bSign = extractFloat64Sign(b);
+
+    if (aSign == bSign) {
+        return subFloat64Sigs(a, b, aSign, status);
+    }
+    else {
+        return addFloat64Sigs(a, b, aSign, status);
+    }
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of multiplying the double-precision floating-point values
+| `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
+| for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 float64_mul(float64 a, float64 b, float_status_t &status)
+{
+    int aSign, bSign, zSign;
+    int16_t aExp, bExp, zExp;
+    uint64_t aSig, bSig, zSig0, zSig1;
+
+    aSig = extractFloat64Frac(a);
+    aExp = extractFloat64Exp(a);
+    aSign = extractFloat64Sign(a);
+    bSig = extractFloat64Frac(b);
+    bExp = extractFloat64Exp(b);
+    bSign = extractFloat64Sign(b);
+    zSign = aSign ^ bSign;
+
+    if (get_denormals_are_zeros(status)) {
+        if (aExp == 0) aSig = 0;
+        if (bExp == 0) bSig = 0;
+    }
+
+    if (aExp == 0x7FF) {
+        if (aSig || ((bExp == 0x7FF) && bSig)) {
+            return propagateFloat64NaN(a, b, status);
+        }
+        if ((bExp | bSig) == 0) {
+            float_raise(status, float_flag_invalid);
+            return float64_default_nan;
+        }
+        if (bSig && (bExp == 0)) float_raise(status, float_flag_denormal);
+        return packFloat64(zSign, 0x7FF, 0);
+    }
+    if (bExp == 0x7FF) {
+        if (bSig) return propagateFloat64NaN(a, b, status);
+        if ((aExp | aSig) == 0) {
+            float_raise(status, float_flag_invalid);
+            return float64_default_nan;
+        }
+        if (aSig && (aExp == 0)) float_raise(status, float_flag_denormal);
+        return packFloat64(zSign, 0x7FF, 0);
+    }
+    if (aExp == 0) {
+        if (aSig == 0) {
+            if (bSig && (bExp == 0)) float_raise(status, float_flag_denormal);
+            return packFloat64(zSign, 0, 0);
+        }
+        float_raise(status, float_flag_denormal);
+        normalizeFloat64Subnormal(aSig, &aExp, &aSig);
+    }
+    if (bExp == 0) {
+        if (bSig == 0) return packFloat64(zSign, 0, 0);
+        float_raise(status, float_flag_denormal);
+        normalizeFloat64Subnormal(bSig, &bExp, &bSig);
+    }
+    zExp = aExp + bExp - 0x3FF;
+    aSig = (aSig | UINT64_C(0x0010000000000000))<<10;
+    bSig = (bSig | UINT64_C(0x0010000000000000))<<11;
+    mul64To128(aSig, bSig, &zSig0, &zSig1);
+    zSig0 |= (zSig1 != 0);
+    if (0 <= (int64_t) (zSig0<<1)) {
+        zSig0 <<= 1;
+        --zExp;
+    }
+    return roundAndPackFloat64(zSign, zExp, zSig0, status);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of dividing the double-precision floating-point value `a'
+| by the corresponding value `b'.  The operation is performed according to
+| the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 float64_div(float64 a, float64 b, float_status_t &status)
+{
+    int aSign, bSign, zSign;
+    int16_t aExp, bExp, zExp;
+    uint64_t aSig, bSig, zSig;
+    uint64_t rem0, rem1;
+    uint64_t term0, term1;
+
+    aSig = extractFloat64Frac(a);
+    aExp = extractFloat64Exp(a);
+    aSign = extractFloat64Sign(a);
+    bSig = extractFloat64Frac(b);
+    bExp = extractFloat64Exp(b);
+    bSign = extractFloat64Sign(b);
+    zSign = aSign ^ bSign;
+
+    if (get_denormals_are_zeros(status)) {
+        if (aExp == 0) aSig = 0;
+        if (bExp == 0) bSig = 0;
+    }
+
+    if (aExp == 0x7FF) {
+        if (aSig) return propagateFloat64NaN(a, b, status);
+        if (bExp == 0x7FF) {
+            if (bSig) return propagateFloat64NaN(a, b, status);
+            float_raise(status, float_flag_invalid);
+            return float64_default_nan;
+        }
+        if (bSig && (bExp == 0)) float_raise(status, float_flag_denormal);
+        return packFloat64(zSign, 0x7FF, 0);
+    }
+    if (bExp == 0x7FF) {
+        if (bSig) return propagateFloat64NaN(a, b, status);
+        if (aSig && (aExp == 0)) float_raise(status, float_flag_denormal);
+        return packFloat64(zSign, 0, 0);
+    }
+    if (bExp == 0) {
+        if (bSig == 0) {
+            if ((aExp | aSig) == 0) {
+                float_raise(status, float_flag_invalid);
+                return float64_default_nan;
+            }
+            float_raise(status, float_flag_divbyzero);
+            return packFloat64(zSign, 0x7FF, 0);
+        }
+        float_raise(status, float_flag_denormal);
+        normalizeFloat64Subnormal(bSig, &bExp, &bSig);
+    }
+    if (aExp == 0) {
+        if (aSig == 0) return packFloat64(zSign, 0, 0);
+        float_raise(status, float_flag_denormal);
+        normalizeFloat64Subnormal(aSig, &aExp, &aSig);
+    }
+    zExp = aExp - bExp + 0x3FD;
+    aSig = (aSig | UINT64_C(0x0010000000000000))<<10;
+    bSig = (bSig | UINT64_C(0x0010000000000000))<<11;
+    if (bSig <= (aSig + aSig)) {
+        aSig >>= 1;
+        ++zExp;
+    }
+    zSig = estimateDiv128To64(aSig, 0, bSig);
+    if ((zSig & 0x1FF) <= 2) {
+        mul64To128(bSig, zSig, &term0, &term1);
+        sub128(aSig, 0, term0, term1, &rem0, &rem1);
+        while ((int64_t) rem0 < 0) {
+            --zSig;
+            add128(rem0, rem1, 0, bSig, &rem0, &rem1);
+        }
+        zSig |= (rem1 != 0);
+    }
+    return roundAndPackFloat64(zSign, zExp, zSig, status);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the square root of the double-precision floating-point value `a'.
+| The operation is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 float64_sqrt(float64 a, float_status_t &status)
+{
+    int aSign;
+    int16_t aExp, zExp;
+    uint64_t aSig, zSig, doubleZSig;
+    uint64_t rem0, rem1, term0, term1;
+
+    aSig = extractFloat64Frac(a);
+    aExp = extractFloat64Exp(a);
+    aSign = extractFloat64Sign(a);
+
+    if (aExp == 0x7FF) {
+        if (aSig) return propagateFloat64NaN(a, status);
+        if (! aSign) return a;
+        float_raise(status, float_flag_invalid);
+        return float64_default_nan;
+    }
+
+    if (get_denormals_are_zeros(status)) {
+        if (aExp == 0) aSig = 0;
+    }
+
+    if (aSign) {
+        if ((aExp | aSig) == 0) return packFloat64(aSign, 0, 0);
+        float_raise(status, float_flag_invalid);
+        return float64_default_nan;
+    }
+    if (aExp == 0) {
+        if (aSig == 0) return 0;
+        float_raise(status, float_flag_denormal);
+        normalizeFloat64Subnormal(aSig, &aExp, &aSig);
+    }
+    zExp = ((aExp - 0x3FF)>>1) + 0x3FE;
+    aSig |= UINT64_C(0x0010000000000000);
+    zSig = estimateSqrt32(aExp, (uint32_t)(aSig>>21));
+    aSig <<= 9 - (aExp & 1);
+    zSig = estimateDiv128To64(aSig, 0, zSig<<32) + (zSig<<30);
+    if ((zSig & 0x1FF) <= 5) {
+        doubleZSig = zSig<<1;
+        mul64To128(zSig, zSig, &term0, &term1);
+        sub128(aSig, 0, term0, term1, &rem0, &rem1);
+        while ((int64_t) rem0 < 0) {
+            --zSig;
+            doubleZSig -= 2;
+            add128(rem0, rem1, zSig>>63, doubleZSig | 1, &rem0, &rem1);
+        }
+        zSig |= ((rem0 | rem1) != 0);
+    }
+    return roundAndPackFloat64(0, zExp, zSig, status);
+}
+
+/*----------------------------------------------------------------------------
+| Determine double-precision floating-point number class
+*----------------------------------------------------------------------------*/
+
+float_class_t float64_class(float64 a)
+{
+   int16_t aExp = extractFloat64Exp(a);
+   uint64_t aSig = extractFloat64Frac(a);
+   int  aSign = extractFloat64Sign(a);
+
+   if(aExp == 0x7FF) {
+       if (aSig == 0)
+           return (aSign) ? float_negative_inf : float_positive_inf;
+
+       return float_NaN;
+   }
+
+   if(aExp == 0) {
+       if (aSig == 0)
+           return float_zero;
+       return float_denormal;
+   }
+
+   return float_normalized;
+}
+
+/*----------------------------------------------------------------------------
+| Compare  between  two  double  precision  floating  point  numbers. Returns
+| 'float_relation_equal'  if the operands are equal, 'float_relation_less' if
+| the    value    'a'   is   less   than   the   corresponding   value   `b',
+| 'float_relation_greater' if the value 'a' is greater than the corresponding
+| value `b', or 'float_relation_unordered' otherwise.
+*----------------------------------------------------------------------------*/
+
+int float64_compare(float64 a, float64 b, float_status_t &status)
+{
+    if (get_denormals_are_zeros(status)) {
+        a = float64_denormal_to_zero(a);
+        b = float64_denormal_to_zero(b);
+    }
+
+    float_class_t aClass = float64_class(a);
+    float_class_t bClass = float64_class(b);
+
+    if (aClass == float_NaN || bClass == float_NaN) {
+        float_raise(status, float_flag_invalid);
+        return float_relation_unordered;
+    }
+
+    if (aClass == float_denormal || bClass == float_denormal)
+    {
+        float_raise(status, float_flag_denormal);
+    }
+
+    if ((a == b) || ((uint64_t) ((a | b)<<1) == 0)) return float_relation_equal;
+
+    int aSign = extractFloat64Sign(a);
+    int bSign = extractFloat64Sign(b);
+    if (aSign != bSign)
+        return (aSign) ? float_relation_less : float_relation_greater;
+
+    if (aSign ^ (a < b)) return float_relation_less;
+    return float_relation_greater;
+}
+
+/*----------------------------------------------------------------------------
+| Compare  between  two  double  precision  floating  point  numbers. Returns
+| 'float_relation_equal'  if the operands are equal, 'float_relation_less' if
+| the    value    'a'   is   less   than   the   corresponding   value   `b',
+| 'float_relation_greater' if the value 'a' is greater than the corresponding
+| value `b', or 'float_relation_unordered' otherwise. Quiet NaNs do not cause
+| an exception.
+*----------------------------------------------------------------------------*/
+
+int float64_compare_quiet(float64 a, float64 b, float_status_t &status)
+{
+    if (get_denormals_are_zeros(status)) {
+        a = float64_denormal_to_zero(a);
+        b = float64_denormal_to_zero(b);
+    }
+
+    float_class_t aClass = float64_class(a);
+    float_class_t bClass = float64_class(b);
+
+    if (aClass == float_NaN || bClass == float_NaN)
+    {
+        if (float64_is_signaling_nan(a) || float64_is_signaling_nan(b))
+        {
+            float_raise(status, float_flag_invalid);
+        }
+        return float_relation_unordered;
+    }
+
+    if (aClass == float_denormal || bClass == float_denormal)
+    {
+        float_raise(status, float_flag_denormal);
+    }
+
+    if ((a == b) || ((uint64_t) ((a | b)<<1) == 0)) return float_relation_equal;
+
+    int aSign = extractFloat64Sign(a);
+    int bSign = extractFloat64Sign(b);
+    if (aSign != bSign)
+        return (aSign) ? float_relation_less : float_relation_greater;
+
+    if (aSign ^ (a < b)) return float_relation_less;
+    return float_relation_greater;
+}
+
+/*----------------------------------------------------------------------------
+| Compare bewteen two double precision floating point numbers and return the
+| smaller  of  them.  The operation  is performed  according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 float64_min(float64 a, float64 b, float_status_t &status)
+{
+  if (get_denormals_are_zeros(status)) {
+    a = float64_denormal_to_zero(a);
+    b = float64_denormal_to_zero(b);
+  }
+
+  return (float64_compare(a, b, status) == float_relation_less) ? a : b;
+}
+
+/*----------------------------------------------------------------------------
+| Compare bewteen two double precision floating point numbers and return the
+| larger  of  them.  The operation  is performed  according to  the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 float64_max(float64 a, float64 b, float_status_t &status)
+{
+  if (get_denormals_are_zeros(status)) {
+    a = float64_denormal_to_zero(a);
+    b = float64_denormal_to_zero(b);
+  }
+
+  return (float64_compare(a, b, status) == float_relation_greater) ? a : b;
+}
+
+#ifdef FLOATX80
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the 32-bit two's complement integer `a'
+| to the extended double-precision floating-point format.  The conversion
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 int32_to_floatx80(int32_t a)
+{
+    if (a == 0) return packFloatx80(0, 0, 0);
+    int   zSign = (a < 0);
+    uint32_t absA = zSign ? -a : a;
+    int    shiftCount = countLeadingZeros32(absA) + 32;
+    uint64_t zSig = absA;
+    return packFloatx80(zSign, 0x403E - shiftCount, zSig<<shiftCount);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the 64-bit two's complement integer `a'
+| to the extended double-precision floating-point format.  The conversion
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 int64_to_floatx80(int64_t a)
+{
+    if (a == 0) return packFloatx80(0, 0, 0);
+    int   zSign = (a < 0);
+    uint64_t absA = zSign ? -a : a;
+    int    shiftCount = countLeadingZeros64(absA);
+    return packFloatx80(zSign, 0x403E - shiftCount, absA<<shiftCount);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the single-precision floating-point value
+| `a' to the extended double-precision floating-point format.  The conversion
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 float32_to_floatx80(float32 a, float_status_t &status)
+{
+    uint32_t aSig = extractFloat32Frac(a);
+    int16_t aExp = extractFloat32Exp(a);
+    int aSign = extractFloat32Sign(a);
+    if (aExp == 0xFF) {
+        if (aSig) return commonNaNToFloatx80(float32ToCommonNaN(a, status));
+        return packFloatx80(aSign, 0x7FFF, UINT64_C(0x8000000000000000));
+    }
+    if (aExp == 0) {
+        if (aSig == 0) return packFloatx80(aSign, 0, 0);
+        float_raise(status, float_flag_denormal);
+        normalizeFloat32Subnormal(aSig, &aExp, &aSig);
+    }
+    aSig |= 0x00800000;
+    return packFloatx80(aSign, aExp + 0x3F80, ((uint64_t) aSig)<<40);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the double-precision floating-point value
+| `a' to the extended double-precision floating-point format.  The conversion
+| is performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 float64_to_floatx80(float64 a, float_status_t &status)
+{
+    uint64_t aSig = extractFloat64Frac(a);
+    int16_t aExp = extractFloat64Exp(a);
+    int aSign = extractFloat64Sign(a);
+
+    if (aExp == 0x7FF) {
+        if (aSig) return commonNaNToFloatx80(float64ToCommonNaN(a, status));
+        return packFloatx80(aSign, 0x7FFF, UINT64_C(0x8000000000000000));
+    }
+    if (aExp == 0) {
+        if (aSig == 0) return packFloatx80(aSign, 0, 0);
+        float_raise(status, float_flag_denormal);
+        normalizeFloat64Subnormal(aSig, &aExp, &aSig);
+    }
+    return
+        packFloatx80(
+            aSign, aExp + 0x3C00, (aSig | UINT64_C(0x0010000000000000))<<11);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the extended double-precision floating-
+| point value `a' to the 32-bit two's complement integer format.  The
+| conversion is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic - which means in particular that the conversion
+| is rounded according to the current rounding mode. If `a' is a NaN or the
+| conversion overflows, the integer indefinite value is returned.
+*----------------------------------------------------------------------------*/
+
+int32_t floatx80_to_int32(floatx80 a, float_status_t &status)
+{
+    uint64_t aSig = extractFloatx80Frac(a);
+    int32_t aExp = extractFloatx80Exp(a);
+    int aSign = extractFloatx80Sign(a);
+
+    // handle unsupported extended double-precision floating encodings
+    if (floatx80_is_unsupported(a))
+    {
+        float_raise(status, float_flag_invalid);
+        return int32_indefinite;
+    }
+
+    if ((aExp == 0x7FFF) && (uint64_t) (aSig<<1)) aSign = 0;
+    int shiftCount = 0x4037 - aExp;
+    if (shiftCount <= 0) shiftCount = 1;
+    aSig = shift64RightJamming(aSig, shiftCount);
+    return roundAndPackInt32(aSign, aSig, status);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the extended double-precision floating-
+| point value `a' to the 32-bit two's complement integer format.  The
+| conversion is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic, except that the conversion is always rounded
+| toward zero.  If `a' is a NaN or the conversion overflows, the integer
+| indefinite value is returned.
+*----------------------------------------------------------------------------*/
+
+int32_t floatx80_to_int32_round_to_zero(floatx80 a, float_status_t &status)
+{
+    int32_t aExp;
+    uint64_t aSig, savedASig;
+    int32_t z;
+    int shiftCount;
+
+    // handle unsupported extended double-precision floating encodings
+    if (floatx80_is_unsupported(a))
+    {
+        float_raise(status, float_flag_invalid);
+        return int32_indefinite;
+    }
+
+    aSig = extractFloatx80Frac(a);
+    aExp = extractFloatx80Exp(a);
+    int aSign = extractFloatx80Sign(a);
+
+    if (aExp > 0x401E) goto invalid;
+    if (aExp < 0x3FFF) {
+        if (aExp || aSig) float_raise(status, float_flag_inexact);
+        return 0;
+    }
+    shiftCount = 0x403E - aExp;
+    savedASig = aSig;
+    aSig >>= shiftCount;
+    z = (int32_t) aSig;
+    if (aSign) z = -z;
+    if ((z < 0) ^ aSign) {
+ invalid:
+        float_raise(status, float_flag_invalid);
+        return (int32_t)(int32_indefinite);
+    }
+    if ((aSig<<shiftCount) != savedASig)
+    {
+        float_raise(status, float_flag_inexact);
+    }
+    return z;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the extended double-precision floating-
+| point value `a' to the 64-bit two's complement integer format.  The
+| conversion is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic - which means in particular that the conversion
+| is rounded according to the current rounding mode. If `a' is a NaN or the
+| conversion overflows, the integer indefinite value is returned.
+*----------------------------------------------------------------------------*/
+
+int64_t floatx80_to_int64(floatx80 a, float_status_t &status)
+{
+    int32_t aExp;
+    uint64_t aSig, aSigExtra;
+
+    // handle unsupported extended double-precision floating encodings
+    if (floatx80_is_unsupported(a))
+    {
+        float_raise(status, float_flag_invalid);
+        return int64_indefinite;
+    }
+
+    aSig = extractFloatx80Frac(a);
+    aExp = extractFloatx80Exp(a);
+    int aSign = extractFloatx80Sign(a);
+
+    int shiftCount = 0x403E - aExp;
+    if (shiftCount <= 0)
+    {
+        if (shiftCount)
+        {
+            float_raise(status, float_flag_invalid);
+            return (int64_t)(int64_indefinite);
+        }
+        aSigExtra = 0;
+    }
+    else {
+        shift64ExtraRightJamming(aSig, 0, shiftCount, &aSig, &aSigExtra);
+    }
+
+    return roundAndPackInt64(aSign, aSig, aSigExtra, status);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the extended double-precision floating-
+| point value `a' to the 64-bit two's complement integer format.  The
+| conversion is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic, except that the conversion is always rounded
+| toward zero.  If `a' is a NaN or the conversion overflows, the integer
+| indefinite value is returned.
+*----------------------------------------------------------------------------*/
+
+int64_t floatx80_to_int64_round_to_zero(floatx80 a, float_status_t &status)
+{
+    int aSign;
+    int32_t aExp;
+    uint64_t aSig;
+    int64_t z;
+
+    // handle unsupported extended double-precision floating encodings
+    if (floatx80_is_unsupported(a))
+    {
+        float_raise(status, float_flag_invalid);
+        return int64_indefinite;
+    }
+
+    aSig = extractFloatx80Frac(a);
+    aExp = extractFloatx80Exp(a);
+    aSign = extractFloatx80Sign(a);
+    int shiftCount = aExp - 0x403E;
+    if (0 <= shiftCount) {
+        aSig &= UINT64_C(0x7FFFFFFFFFFFFFFF);
+        if ((a.exp != 0xC03E) || aSig) {
+            float_raise(status, float_flag_invalid);
+        }
+        return (int64_t)(int64_indefinite);
+    }
+    else if (aExp < 0x3FFF) {
+        if (aExp | aSig) float_raise(status, float_flag_inexact);
+        return 0;
+    }
+    z = aSig>>(-shiftCount);
+    if ((uint64_t) (aSig<<(shiftCount & 63))) {
+        float_raise(status, float_flag_inexact);
+    }
+    if (aSign) z = -z;
+    return z;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the extended double-precision floating-
+| point value `a' to the single-precision floating-point format.  The
+| conversion is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 floatx80_to_float32(floatx80 a, float_status_t &status)
+{
+    uint64_t aSig = extractFloatx80Frac(a);
+    int32_t aExp = extractFloatx80Exp(a);
+    int aSign = extractFloatx80Sign(a);
+
+    // handle unsupported extended double-precision floating encodings
+    if (floatx80_is_unsupported(a))
+    {
+        float_raise(status, float_flag_invalid);
+        return float32_default_nan;
+    }
+
+    if (aExp == 0x7FFF) {
+        if ((uint64_t) (aSig<<1))
+            return commonNaNToFloat32(floatx80ToCommonNaN(a, status));
+
+        return packFloat32(aSign, 0xFF, 0);
+    }
+    aSig = shift64RightJamming(aSig, 33);
+    if (aExp || aSig) aExp -= 0x3F81;
+    return roundAndPackFloat32(aSign, aExp, (uint32_t) aSig, status);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the extended double-precision floating-
+| point value `a' to the double-precision floating-point format.  The
+| conversion is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float64 floatx80_to_float64(floatx80 a, float_status_t &status)
+{
+    int32_t aExp;
+    uint64_t aSig, zSig;
+
+    // handle unsupported extended double-precision floating encodings
+    if (floatx80_is_unsupported(a))
+    {
+        float_raise(status, float_flag_invalid);
+        return float64_default_nan;
+    }
+
+    aSig = extractFloatx80Frac(a);
+    aExp = extractFloatx80Exp(a);
+    int aSign = extractFloatx80Sign(a);
+
+    if (aExp == 0x7FFF) {
+        if ((uint64_t) (aSig<<1)) {
+            return commonNaNToFloat64(floatx80ToCommonNaN(a, status));
+        }
+        return packFloat64(aSign, 0x7FF, 0);
+    }
+    zSig = shift64RightJamming(aSig, 1);
+    if (aExp || aSig) aExp -= 0x3C01;
+    return roundAndPackFloat64(aSign, aExp, zSig, status);
+}
+
+/*----------------------------------------------------------------------------
+| Rounds the extended double-precision floating-point value `a' to an integer,
+| and returns the result as an extended double-precision floating-point
+| value.  The operation is performed according to the IEC/IEEE Standard for
+| Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 floatx80_round_to_int(floatx80 a, float_status_t &status)
+{
+    int aSign;
+    uint64_t lastBitMask, roundBitsMask;
+    int roundingMode = get_float_rounding_mode(status);
+    floatx80 z;
+
+    // handle unsupported extended double-precision floating encodings
+    if (floatx80_is_unsupported(a))
+    {
+        float_raise(status, float_flag_invalid);
+        return floatx80_default_nan;
+    }
+
+    int32_t aExp = extractFloatx80Exp(a);
+    uint64_t aSig = extractFloatx80Frac(a);
+    if (0x403E <= aExp) {
+        if ((aExp == 0x7FFF) && (uint64_t) (aSig<<1)) {
+            return propagateFloatx80NaN(a, status);
+        }
+        return a;
+    }
+    if (aExp < 0x3FFF) {
+        if (aExp == 0) {
+            if ((aSig<<1) == 0) return a;
+            float_raise(status, float_flag_denormal);
+        }
+        float_raise(status, float_flag_inexact);
+        aSign = extractFloatx80Sign(a);
+        switch (roundingMode) {
+         case float_round_nearest_even:
+            if ((aExp == 0x3FFE) && (uint64_t) (aSig<<1)) {
+                set_float_rounding_up(status);
+                return packFloatx80(aSign, 0x3FFF, UINT64_C(0x8000000000000000));
+            }
+            break;
+         case float_round_down:
+            if (aSign) {
+                set_float_rounding_up(status);
+                return packFloatx80(1, 0x3FFF, UINT64_C(0x8000000000000000));
+            }
+            else {
+                return packFloatx80(0, 0, 0);
+            }
+         case float_round_up:
+            if (aSign) {
+                return packFloatx80(1, 0, 0);
+            }
+            else {
+                set_float_rounding_up(status);
+                return packFloatx80(0, 0x3FFF, UINT64_C(0x8000000000000000));
+            }
+        }
+        return packFloatx80(aSign, 0, 0);
+    }
+    lastBitMask = 1;
+    lastBitMask <<= 0x403E - aExp;
+    roundBitsMask = lastBitMask - 1;
+    z = a;
+    if (roundingMode == float_round_nearest_even) {
+        z.fraction += lastBitMask>>1;
+        if ((z.fraction & roundBitsMask) == 0) z.fraction &= ~lastBitMask;
+    }
+    else if (roundingMode != float_round_to_zero) {
+        if (extractFloatx80Sign(z) ^ (roundingMode == float_round_up))
+            z.fraction += roundBitsMask;
+    }
+    z.fraction &= ~roundBitsMask;
+    if (z.fraction == 0) {
+        z.exp++;
+        z.fraction = UINT64_C(0x8000000000000000);
+    }
+    if (z.fraction != a.fraction) {
+        float_raise(status, float_flag_inexact);
+        if (z.fraction > a.fraction || z.exp > a.exp)
+            set_float_rounding_up(status);
+    }
+    return z;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of adding the absolute values of the extended double-
+| precision floating-point values `a' and `b'.  If `zSign' is 1, the sum is
+| negated before being returned.  `zSign' is ignored if the result is a NaN.
+| The addition is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+static floatx80 addFloatx80Sigs(floatx80 a, floatx80 b, int zSign, float_status_t &status)
+{
+    int32_t aExp, bExp, zExp;
+    uint64_t aSig, bSig, zSig0, zSig1;
+
+    // handle unsupported extended double-precision floating encodings
+    if (floatx80_is_unsupported(a) || floatx80_is_unsupported(b))
+    {
+        float_raise(status, float_flag_invalid);
+        return floatx80_default_nan;
+    }
+
+    aSig = extractFloatx80Frac(a);
+    aExp = extractFloatx80Exp(a);
+    bSig = extractFloatx80Frac(b);
+    bExp = extractFloatx80Exp(b);
+
+    if (aExp == 0x7FFF) {
+        if ((uint64_t) (aSig<<1) || ((bExp == 0x7FFF) && (uint64_t) (bSig<<1)))
+            return propagateFloatx80NaN(a, b, status);
+        if (bSig && (bExp == 0)) float_raise(status, float_flag_denormal);
+        return a;
+    }
+    if (bExp == 0x7FFF) {
+        if ((uint64_t) (bSig<<1)) return propagateFloatx80NaN(a, b, status);
+        if (aSig && (aExp == 0)) float_raise(status, float_flag_denormal);
+        return packFloatx80(zSign, 0x7FFF, UINT64_C(0x8000000000000000));
+    }
+    if (aExp == 0) {
+        if (aSig == 0) {
+            if ((bExp == 0) && bSig) {
+                float_raise(status, float_flag_denormal);
+                normalizeFloatx80Subnormal(bSig, &bExp, &bSig);
+            }
+            return roundAndPackFloatx80(get_float_rounding_precision(status),
+                    zSign, bExp, bSig, 0, status);
+        }
+        float_raise(status, float_flag_denormal);
+        normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
+    }
+    if (bExp == 0) {
+        if (bSig == 0)
+            return roundAndPackFloatx80(get_float_rounding_precision(status),
+                    zSign, aExp, aSig, 0, status);
+
+        float_raise(status, float_flag_denormal);
+        normalizeFloatx80Subnormal(bSig, &bExp, &bSig);
+    }
+    int32_t expDiff = aExp - bExp;
+    zExp = aExp;
+    if (0 < expDiff) {
+        shift64ExtraRightJamming(bSig, 0,  expDiff, &bSig, &zSig1);
+    }
+    else if (expDiff < 0) {
+        shift64ExtraRightJamming(aSig, 0, -expDiff, &aSig, &zSig1);
+        zExp = bExp;
+    }
+    else {
+        zSig0 = aSig + bSig;
+        zSig1 = 0;
+        goto shiftRight1;
+    }
+    zSig0 = aSig + bSig;
+    if ((int64_t) zSig0 < 0) goto roundAndPack;
+ shiftRight1:
+    shift64ExtraRightJamming(zSig0, zSig1, 1, &zSig0, &zSig1);
+    zSig0 |= UINT64_C(0x8000000000000000);
+    zExp++;
+ roundAndPack:
+    return
+        roundAndPackFloatx80(get_float_rounding_precision(status),
+            zSign, zExp, zSig0, zSig1, status);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of subtracting the absolute values of the extended
+| double-precision floating-point values `a' and `b'.  If `zSign' is 1, the
+| difference is negated before being returned.  `zSign' is ignored if the
+| result is a NaN.  The subtraction is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+static floatx80 subFloatx80Sigs(floatx80 a, floatx80 b, int zSign, float_status_t &status)
+{
+    int32_t aExp, bExp, zExp;
+    uint64_t aSig, bSig, zSig0, zSig1;
+
+    // handle unsupported extended double-precision floating encodings
+    if (floatx80_is_unsupported(a) || floatx80_is_unsupported(b))
+    {
+        float_raise(status, float_flag_invalid);
+        return floatx80_default_nan;
+    }
+
+    aSig = extractFloatx80Frac(a);
+    aExp = extractFloatx80Exp(a);
+    bSig = extractFloatx80Frac(b);
+    bExp = extractFloatx80Exp(b);
+
+    if (aExp == 0x7FFF) {
+        if ((uint64_t) (aSig<<1)) return propagateFloatx80NaN(a, b, status);
+        if (bExp == 0x7FFF) {
+            if ((uint64_t) (bSig<<1)) return propagateFloatx80NaN(a, b, status);
+            float_raise(status, float_flag_invalid);
+            return floatx80_default_nan;
+        }
+        if (bSig && (bExp == 0)) float_raise(status, float_flag_denormal);
+        return a;
+    }
+    if (bExp == 0x7FFF) {
+        if ((uint64_t) (bSig<<1)) return propagateFloatx80NaN(a, b, status);
+        if (aSig && (aExp == 0)) float_raise(status, float_flag_denormal);
+        return packFloatx80(zSign ^ 1, 0x7FFF, UINT64_C(0x8000000000000000));
+    }
+    if (aExp == 0) {
+        if (aSig == 0) {
+            if (bExp == 0) {
+                if (bSig) {
+                    float_raise(status, float_flag_denormal);
+                    normalizeFloatx80Subnormal(bSig, &bExp, &bSig);
+                    return roundAndPackFloatx80(get_float_rounding_precision(status),
+                        zSign ^ 1, bExp, bSig, 0, status);
+                }
+                return packFloatx80(get_float_rounding_mode(status) == float_round_down, 0, 0);
+            }
+            return roundAndPackFloatx80(get_float_rounding_precision(status),
+                    zSign ^ 1, bExp, bSig, 0, status);
+        }
+        float_raise(status, float_flag_denormal);
+        normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
+    }
+    if (bExp == 0) {
+        if (bSig == 0)
+            return roundAndPackFloatx80(get_float_rounding_precision(status),
+                    zSign, aExp, aSig, 0, status);
+
+        float_raise(status, float_flag_denormal);
+        normalizeFloatx80Subnormal(bSig, &bExp, &bSig);
+    }
+    int32_t expDiff = aExp - bExp;
+    if (0 < expDiff) {
+        shift128RightJamming(bSig, 0, expDiff, &bSig, &zSig1);
+        goto aBigger;
+    }
+    if (expDiff < 0) {
+        shift128RightJamming(aSig, 0, -expDiff, &aSig, &zSig1);
+        goto bBigger;
+    }
+    zSig1 = 0;
+    if (bSig < aSig) goto aBigger;
+    if (aSig < bSig) goto bBigger;
+    return packFloatx80(get_float_rounding_mode(status) == float_round_down, 0, 0);
+ bBigger:
+    sub128(bSig, 0, aSig, zSig1, &zSig0, &zSig1);
+    zExp = bExp;
+    zSign ^= 1;
+    goto normalizeRoundAndPack;
+ aBigger:
+    sub128(aSig, 0, bSig, zSig1, &zSig0, &zSig1);
+    zExp = aExp;
+ normalizeRoundAndPack:
+    return
+        normalizeRoundAndPackFloatx80(get_float_rounding_precision(status),
+            zSign, zExp, zSig0, zSig1, status);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of adding the extended double-precision floating-point
+| values `a' and `b'.  The operation is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 floatx80_add(floatx80 a, floatx80 b, float_status_t &status)
+{
+    int aSign = extractFloatx80Sign(a);
+    int bSign = extractFloatx80Sign(b);
+
+    if (aSign == bSign)
+        return addFloatx80Sigs(a, b, aSign, status);
+    else
+        return subFloatx80Sigs(a, b, aSign, status);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of subtracting the extended double-precision floating-
+| point values `a' and `b'.  The operation is performed according to the
+| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 floatx80_sub(floatx80 a, floatx80 b, float_status_t &status)
+{
+    int aSign = extractFloatx80Sign(a);
+    int bSign = extractFloatx80Sign(b);
+
+    if (aSign == bSign)
+        return subFloatx80Sigs(a, b, aSign, status);
+    else
+        return addFloatx80Sigs(a, b, aSign, status);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of multiplying the extended double-precision floating-
+| point values `a' and `b'.  The operation is performed according to the
+| IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 floatx80_mul(floatx80 a, floatx80 b, float_status_t &status)
+{
+    int aSign, bSign, zSign;
+    int32_t aExp, bExp, zExp;
+    uint64_t aSig, bSig, zSig0, zSig1;
+
+    // handle unsupported extended double-precision floating encodings
+    if (floatx80_is_unsupported(a) || floatx80_is_unsupported(b))
+    {
+ invalid:
+        float_raise(status, float_flag_invalid);
+        return floatx80_default_nan;
+    }
+
+    aSig = extractFloatx80Frac(a);
+    aExp = extractFloatx80Exp(a);
+    aSign = extractFloatx80Sign(a);
+    bSig = extractFloatx80Frac(b);
+    bExp = extractFloatx80Exp(b);
+    bSign = extractFloatx80Sign(b);
+    zSign = aSign ^ bSign;
+
+    if (aExp == 0x7FFF) {
+        if ((uint64_t) (aSig<<1) || ((bExp == 0x7FFF) && (uint64_t) (bSig<<1))) {
+            return propagateFloatx80NaN(a, b, status);
+        }
+        if (bExp == 0) {
+            if (bSig == 0) goto invalid;
+            float_raise(status, float_flag_denormal);
+        }
+        return packFloatx80(zSign, 0x7FFF, UINT64_C(0x8000000000000000));
+    }
+    if (bExp == 0x7FFF) {
+        if ((uint64_t) (bSig<<1)) return propagateFloatx80NaN(a, b, status);
+        if (aExp == 0) {
+            if (aSig == 0) goto invalid;
+            float_raise(status, float_flag_denormal);
+        }
+        return packFloatx80(zSign, 0x7FFF, UINT64_C(0x8000000000000000));
+    }
+    if (aExp == 0) {
+        if (aSig == 0) {
+            if (bSig && (bExp == 0)) float_raise(status, float_flag_denormal);
+            return packFloatx80(zSign, 0, 0);
+        }
+        float_raise(status, float_flag_denormal);
+        normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
+    }
+    if (bExp == 0) {
+        if (bSig == 0) return packFloatx80(zSign, 0, 0);
+        float_raise(status, float_flag_denormal);
+        normalizeFloatx80Subnormal(bSig, &bExp, &bSig);
+    }
+    zExp = aExp + bExp - 0x3FFE;
+    mul64To128(aSig, bSig, &zSig0, &zSig1);
+    if (0 < (int64_t) zSig0) {
+        shortShift128Left(zSig0, zSig1, 1, &zSig0, &zSig1);
+        --zExp;
+    }
+    return
+        roundAndPackFloatx80(get_float_rounding_precision(status),
+             zSign, zExp, zSig0, zSig1, status);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of dividing the extended double-precision floating-point
+| value `a' by the corresponding value `b'.  The operation is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 floatx80_div(floatx80 a, floatx80 b, float_status_t &status)
+{
+    int aSign, bSign, zSign;
+    int32_t aExp, bExp, zExp;
+    uint64_t aSig, bSig, zSig0, zSig1;
+    uint64_t rem0, rem1, rem2, term0, term1, term2;
+
+    // handle unsupported extended double-precision floating encodings
+    if (floatx80_is_unsupported(a) || floatx80_is_unsupported(b))
+    {
+        float_raise(status, float_flag_invalid);
+        return floatx80_default_nan;
+    }
+
+    aSig = extractFloatx80Frac(a);
+    aExp = extractFloatx80Exp(a);
+    aSign = extractFloatx80Sign(a);
+    bSig = extractFloatx80Frac(b);
+    bExp = extractFloatx80Exp(b);
+    bSign = extractFloatx80Sign(b);
+
+    zSign = aSign ^ bSign;
+    if (aExp == 0x7FFF) {
+        if ((uint64_t) (aSig<<1)) return propagateFloatx80NaN(a, b, status);
+        if (bExp == 0x7FFF) {
+            if ((uint64_t) (bSig<<1)) return propagateFloatx80NaN(a, b, status);
+            float_raise(status, float_flag_invalid);
+            return floatx80_default_nan;
+        }
+        if (bSig && (bExp == 0)) float_raise(status, float_flag_denormal);
+        return packFloatx80(zSign, 0x7FFF, UINT64_C(0x8000000000000000));
+    }
+    if (bExp == 0x7FFF) {
+        if ((uint64_t) (bSig<<1)) return propagateFloatx80NaN(a, b, status);
+        if (aSig && (aExp == 0)) float_raise(status, float_flag_denormal);
+        return packFloatx80(zSign, 0, 0);
+    }
+    if (bExp == 0) {
+        if (bSig == 0) {
+            if ((aExp | aSig) == 0) {
+                float_raise(status, float_flag_invalid);
+                return floatx80_default_nan;
+            }
+            float_raise(status, float_flag_divbyzero);
+            return packFloatx80(zSign, 0x7FFF, UINT64_C(0x8000000000000000));
+        }
+        float_raise(status, float_flag_denormal);
+        normalizeFloatx80Subnormal(bSig, &bExp, &bSig);
+    }
+    if (aExp == 0) {
+        if (aSig == 0) return packFloatx80(zSign, 0, 0);
+        float_raise(status, float_flag_denormal);
+        normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
+    }
+    zExp = aExp - bExp + 0x3FFE;
+    rem1 = 0;
+    if (bSig <= aSig) {
+        shift128Right(aSig, 0, 1, &aSig, &rem1);
+        ++zExp;
+    }
+    zSig0 = estimateDiv128To64(aSig, rem1, bSig);
+    mul64To128(bSig, zSig0, &term0, &term1);
+    sub128(aSig, rem1, term0, term1, &rem0, &rem1);
+    while ((int64_t) rem0 < 0) {
+        --zSig0;
+        add128(rem0, rem1, 0, bSig, &rem0, &rem1);
+    }
+    zSig1 = estimateDiv128To64(rem1, 0, bSig);
+    if ((uint64_t) (zSig1<<1) <= 8) {
+        mul64To128(bSig, zSig1, &term1, &term2);
+        sub128(rem1, 0, term1, term2, &rem1, &rem2);
+        while ((int64_t) rem1 < 0) {
+            --zSig1;
+            add128(rem1, rem2, 0, bSig, &rem1, &rem2);
+        }
+        zSig1 |= ((rem1 | rem2) != 0);
+    }
+    return
+        roundAndPackFloatx80(get_float_rounding_precision(status),
+            zSign, zExp, zSig0, zSig1, status);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the square root of the extended double-precision floating-point
+| value `a'.  The operation is performed according to the IEC/IEEE Standard
+| for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 floatx80_sqrt(floatx80 a, float_status_t &status)
+{
+    int aSign;
+    int32_t aExp, zExp;
+    uint64_t aSig0, aSig1, zSig0, zSig1, doubleZSig0;
+    uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
+
+    // handle unsupported extended double-precision floating encodings
+    if (floatx80_is_unsupported(a))
+    {
+        float_raise(status, float_flag_invalid);
+        return floatx80_default_nan;
+    }
+
+    aSig0 = extractFloatx80Frac(a);
+    aExp = extractFloatx80Exp(a);
+    aSign = extractFloatx80Sign(a);
+    if (aExp == 0x7FFF) {
+        if ((uint64_t) (aSig0<<1)) return propagateFloatx80NaN(a, status);
+        if (! aSign) return a;
+        float_raise(status, float_flag_invalid);
+        return floatx80_default_nan;
+    }
+    if (aSign) {
+        if ((aExp | aSig0) == 0) return a;
+        float_raise(status, float_flag_invalid);
+        return floatx80_default_nan;
+    }
+    if (aExp == 0) {
+        if (aSig0 == 0) return packFloatx80(0, 0, 0);
+        float_raise(status, float_flag_denormal);
+        normalizeFloatx80Subnormal(aSig0, &aExp, &aSig0);
+    }
+    zExp = ((aExp - 0x3FFF)>>1) + 0x3FFF;
+    zSig0 = estimateSqrt32(aExp, aSig0>>32);
+    shift128Right(aSig0, 0, 2 + (aExp & 1), &aSig0, &aSig1);
+    zSig0 = estimateDiv128To64(aSig0, aSig1, zSig0<<32) + (zSig0<<30);
+    doubleZSig0 = zSig0<<1;
+    mul64To128(zSig0, zSig0, &term0, &term1);
+    sub128(aSig0, aSig1, term0, term1, &rem0, &rem1);
+    while ((int64_t) rem0 < 0) {
+        --zSig0;
+        doubleZSig0 -= 2;
+        add128(rem0, rem1, zSig0>>63, doubleZSig0 | 1, &rem0, &rem1);
+    }
+    zSig1 = estimateDiv128To64(rem1, 0, doubleZSig0);
+    if ((zSig1 & UINT64_C(0x3FFFFFFFFFFFFFFF)) <= 5) {
+        if (zSig1 == 0) zSig1 = 1;
+        mul64To128(doubleZSig0, zSig1, &term1, &term2);
+        sub128(rem1, 0, term1, term2, &rem1, &rem2);
+        mul64To128(zSig1, zSig1, &term2, &term3);
+        sub192(rem1, rem2, 0, 0, term2, term3, &rem1, &rem2, &rem3);
+        while ((int64_t) rem1 < 0) {
+            --zSig1;
+            shortShift128Left(0, zSig1, 1, &term2, &term3);
+            term3 |= 1;
+            term2 |= doubleZSig0;
+            add192(rem1, rem2, rem3, 0, term2, term3, &rem1, &rem2, &rem3);
+        }
+        zSig1 |= ((rem1 | rem2 | rem3) != 0);
+    }
+    shortShift128Left(0, zSig1, 1, &zSig0, &zSig1);
+    zSig0 |= doubleZSig0;
+    return
+        roundAndPackFloatx80(get_float_rounding_precision(status),
+            0, zExp, zSig0, zSig1, status);
+}
+
+#endif
+
+#ifdef FLOAT128
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the extended double-precision floating-
+| point value `a' to the quadruple-precision floating-point format. The
+| conversion is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 floatx80_to_float128(floatx80 a, float_status_t &status)
+{
+    uint64_t zSig0, zSig1;
+
+    uint64_t aSig = extractFloatx80Frac(a);
+    int32_t aExp = extractFloatx80Exp(a);
+    int   aSign = extractFloatx80Sign(a);
+
+    if ((aExp == 0x7FFF) && (uint64_t) (aSig<<1))
+        return commonNaNToFloat128(floatx80ToCommonNaN(a, status));
+
+    shift128Right(aSig<<1, 0, 16, &zSig0, &zSig1);
+    return packFloat128(aSign, aExp, zSig0, zSig1);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the quadruple-precision floating-point
+| value `a' to the extended double-precision floating-point format.  The
+| conversion is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 float128_to_floatx80(float128 a, float_status_t &status)
+{
+    int32_t aExp;
+    uint64_t aSig0, aSig1;
+
+    aSig1 = extractFloat128Frac1(a);
+    aSig0 = extractFloat128Frac0(a);
+    aExp = extractFloat128Exp(a);
+    int aSign = extractFloat128Sign(a);
+
+    if (aExp == 0x7FFF) {
+        if (aSig0 | aSig1)
+            return commonNaNToFloatx80(float128ToCommonNaN(a, status));
+
+        return packFloatx80(aSign, 0x7FFF, UINT64_C(0x8000000000000000));
+    }
+
+    if (aExp == 0) {
+        if ((aSig0 | aSig1) == 0) return packFloatx80(aSign, 0, 0);
+        float_raise(status, float_flag_denormal);
+        normalizeFloat128Subnormal(aSig0, aSig1, &aExp, &aSig0, &aSig1);
+    }
+    else aSig0 |= UINT64_C(0x0001000000000000);
+
+    shortShift128Left(aSig0, aSig1, 15, &aSig0, &aSig1);
+    return roundAndPackFloatx80(80, aSign, aExp, aSig0, aSig1, status);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of multiplying the extended double-precision floating-
+| point value `a' and quadruple-precision floating point value `b'. The
+| operation is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 floatx80_mul(floatx80 a, float128 b, float_status_t &status)
+{
+    int32_t aExp, bExp, zExp;
+    uint64_t aSig, bSig0, bSig1, zSig0, zSig1, zSig2;
+    int aSign, bSign, zSign;
+
+    // handle unsupported extended double-precision floating encodings
+    if (floatx80_is_unsupported(a))
+    {
+ invalid:
+        float_raise(status, float_flag_invalid);
+        return floatx80_default_nan;
+    }
+
+    aSig = extractFloatx80Frac(a);
+    aExp = extractFloatx80Exp(a);
+    aSign = extractFloatx80Sign(a);
+    bSig0 = extractFloat128Frac0(b);
+    bSig1 = extractFloat128Frac1(b);
+    bExp = extractFloat128Exp(b);
+    bSign = extractFloat128Sign(b);
+
+    zSign = aSign ^ bSign;
+
+    if (aExp == 0x7FFF) {
+        if ((uint64_t) (aSig<<1)
+             || ((bExp == 0x7FFF) && (bSig0 | bSig1)))
+        {
+            floatx80 r = commonNaNToFloatx80(float128ToCommonNaN(b, status));
+            return propagateFloatx80NaN(a, r, status);
+        }
+        if (bExp == 0) {
+            if ((bSig0 | bSig1) == 0) goto invalid;
+            float_raise(status, float_flag_denormal);
+        }
+        return packFloatx80(zSign, 0x7FFF, UINT64_C(0x8000000000000000));
+    }
+    if (bExp == 0x7FFF) {
+        if (bSig0 | bSig1) {
+            floatx80 r = commonNaNToFloatx80(float128ToCommonNaN(b, status));
+            return propagateFloatx80NaN(a, r, status);
+        }
+        if (aExp == 0) {
+            if (aSig == 0) goto invalid;
+            float_raise(status, float_flag_denormal);
+        }
+        return packFloatx80(zSign, 0x7FFF, UINT64_C(0x8000000000000000));
+    }
+    if (aExp == 0) {
+        if (aSig == 0) {
+            if ((bExp == 0) && (bSig0 | bSig1)) float_raise(status, float_flag_denormal);
+            return packFloatx80(zSign, 0, 0);
+        }
+        float_raise(status, float_flag_denormal);
+        normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
+    }
+    if (bExp == 0) {
+        if ((bSig0 | bSig1) == 0) return packFloatx80(zSign, 0, 0);
+        float_raise(status, float_flag_denormal);
+        normalizeFloat128Subnormal(bSig0, bSig1, &bExp, &bSig0, &bSig1);
+    }
+    else bSig0 |= UINT64_C(0x0001000000000000);
+
+    zExp = aExp + bExp - 0x3FFE;
+    shortShift128Left(bSig0, bSig1, 15, &bSig0, &bSig1);
+    mul128By64To192(bSig0, bSig1, aSig, &zSig0, &zSig1, &zSig2);
+    if (0 < (int64_t) zSig0) {
+        shortShift128Left(zSig0, zSig1, 1, &zSig0, &zSig1);
+        --zExp;
+    }
+    return
+        roundAndPackFloatx80(get_float_rounding_precision(status),
+             zSign, zExp, zSig0, zSig1, status);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of adding the absolute values of the quadruple-precision
+| floating-point values `a' and `b'. If `zSign' is 1, the sum is negated
+| before being returned. `zSign' is ignored if the result is a NaN.
+| The addition is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+static float128 addFloat128Sigs(float128 a, float128 b, int zSign, float_status_t &status)
+{
+    int32_t aExp, bExp, zExp;
+    uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
+    int32_t expDiff;
+
+    aSig1 = extractFloat128Frac1(a);
+    aSig0 = extractFloat128Frac0(a);
+    aExp = extractFloat128Exp(a);
+    bSig1 = extractFloat128Frac1(b);
+    bSig0 = extractFloat128Frac0(b);
+    bExp = extractFloat128Exp(b);
+    expDiff = aExp - bExp;
+
+    if (0 < expDiff) {
+        if (aExp == 0x7FFF) {
+            if (aSig0 | aSig1) return propagateFloat128NaN(a, b, status);
+            return a;
+        }
+        if (bExp == 0) --expDiff;
+        else bSig0 |= UINT64_C(0x0001000000000000);
+        shift128ExtraRightJamming(bSig0, bSig1, 0, expDiff, &bSig0, &bSig1, &zSig2);
+        zExp = aExp;
+    }
+    else if (expDiff < 0) {
+        if (bExp == 0x7FFF) {
+            if (bSig0 | bSig1) return propagateFloat128NaN(a, b, status);
+            return packFloat128(zSign, 0x7FFF, 0, 0);
+        }
+        if (aExp == 0) ++expDiff;
+        else aSig0 |= UINT64_C(0x0001000000000000);
+        shift128ExtraRightJamming(aSig0, aSig1, 0, -expDiff, &aSig0, &aSig1, &zSig2);
+        zExp = bExp;
+    }
+    else {
+        if (aExp == 0x7FFF) {
+            if (aSig0 | aSig1 | bSig0 | bSig1)
+                return propagateFloat128NaN(a, b, status);
+
+            return a;
+        }
+        add128(aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1);
+        if (aExp == 0) return packFloat128(zSign, 0, zSig0, zSig1);
+        zSig2 = 0;
+        zSig0 |= UINT64_C(0x0002000000000000);
+        zExp = aExp;
+        goto shiftRight1;
+    }
+    aSig0 |= UINT64_C(0x0001000000000000);
+    add128(aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1);
+    --zExp;
+    if (zSig0 < UINT64_C(0x0002000000000000)) goto roundAndPack;
+    ++zExp;
+ shiftRight1:
+    shift128ExtraRightJamming(zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2);
+ roundAndPack:
+    return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of subtracting the absolute values of the quadruple-
+| precision floating-point values `a' and `b'.  If `zSign' is 1, the
+| difference is negated before being returned.  `zSign' is ignored if the
+| result is a NaN.  The subtraction is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+static float128 subFloat128Sigs(float128 a, float128 b, int zSign, float_status_t &status)
+{
+    int32_t aExp, bExp, zExp;
+    uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1;
+    int32_t expDiff;
+
+    aSig1 = extractFloat128Frac1(a);
+    aSig0 = extractFloat128Frac0(a);
+    aExp = extractFloat128Exp(a);
+    bSig1 = extractFloat128Frac1(b);
+    bSig0 = extractFloat128Frac0(b);
+    bExp = extractFloat128Exp(b);
+
+    expDiff = aExp - bExp;
+    shortShift128Left(aSig0, aSig1, 14, &aSig0, &aSig1);
+    shortShift128Left(bSig0, bSig1, 14, &bSig0, &bSig1);
+    if (0 < expDiff) goto aExpBigger;
+    if (expDiff < 0) goto bExpBigger;
+    if (aExp == 0x7FFF) {
+        if (aSig0 | aSig1 | bSig0 | bSig1)
+            return propagateFloat128NaN(a, b, status);
+
+        float_raise(status, float_flag_invalid);
+        return float128_default_nan;
+    }
+    if (aExp == 0) {
+        aExp = 1;
+        bExp = 1;
+    }
+    if (bSig0 < aSig0) goto aBigger;
+    if (aSig0 < bSig0) goto bBigger;
+    if (bSig1 < aSig1) goto aBigger;
+    if (aSig1 < bSig1) goto bBigger;
+    return packFloat128(0, 0);
+
+ bExpBigger:
+    if (bExp == 0x7FFF) {
+        if (bSig0 | bSig1) return propagateFloat128NaN(a, b, status);
+        return packFloat128(zSign ^ 1, 0x7FFF, 0, 0);
+    }
+    if (aExp == 0) ++expDiff;
+    else {
+        aSig0 |= UINT64_C(0x4000000000000000);
+    }
+    shift128RightJamming(aSig0, aSig1, - expDiff, &aSig0, &aSig1);
+    bSig0 |= UINT64_C(0x4000000000000000);
+ bBigger:
+    sub128(bSig0, bSig1, aSig0, aSig1, &zSig0, &zSig1);
+    zExp = bExp;
+    zSign ^= 1;
+    goto normalizeRoundAndPack;
+ aExpBigger:
+    if (aExp == 0x7FFF) {
+        if (aSig0 | aSig1) return propagateFloat128NaN(a, b, status);
+        return a;
+    }
+    if (bExp == 0) --expDiff;
+    else {
+        bSig0 |= UINT64_C(0x4000000000000000);
+    }
+    shift128RightJamming(bSig0, bSig1, expDiff, &bSig0, &bSig1);
+    aSig0 |= UINT64_C(0x4000000000000000);
+ aBigger:
+    sub128(aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1);
+    zExp = aExp;
+ normalizeRoundAndPack:
+    --zExp;
+    return normalizeRoundAndPackFloat128(zSign, zExp - 14, zSig0, zSig1, status);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of adding the quadruple-precision floating-point values
+| `a' and `b'.  The operation is performed according to the IEC/IEEE Standard
+| for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 float128_add(float128 a, float128 b, float_status_t &status)
+{
+    int aSign = extractFloat128Sign(a);
+    int bSign = extractFloat128Sign(b);
+
+    if (aSign == bSign) {
+        return addFloat128Sigs(a, b, aSign, status);
+    }
+    else {
+        return subFloat128Sigs(a, b, aSign, status);
+    }
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of subtracting the quadruple-precision floating-point
+| values `a' and `b'.  The operation is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 float128_sub(float128 a, float128 b, float_status_t &status)
+{
+    int aSign = extractFloat128Sign(a);
+    int bSign = extractFloat128Sign(b);
+
+    if (aSign == bSign) {
+        return subFloat128Sigs(a, b, aSign, status);
+    }
+    else {
+        return addFloat128Sigs(a, b, aSign, status);
+    }
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of multiplying the quadruple-precision floating-point
+| values `a' and `b'.  The operation is performed according to the IEC/IEEE
+| Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 float128_mul(float128 a, float128 b, float_status_t &status)
+{
+    int aSign, bSign, zSign;
+    int32_t aExp, bExp, zExp;
+    uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2, zSig3;
+
+    aSig1 = extractFloat128Frac1(a);
+    aSig0 = extractFloat128Frac0(a);
+    aExp = extractFloat128Exp(a);
+    aSign = extractFloat128Sign(a);
+    bSig1 = extractFloat128Frac1(b);
+    bSig0 = extractFloat128Frac0(b);
+    bExp = extractFloat128Exp(b);
+    bSign = extractFloat128Sign(b);
+
+    zSign = aSign ^ bSign;
+    if (aExp == 0x7FFF) {
+        if ((aSig0 | aSig1) || ((bExp == 0x7FFF) && (bSig0 | bSig1))) {
+            return propagateFloat128NaN(a, b, status);
+        }
+        if ((bExp | bSig0 | bSig1) == 0) {
+            float_raise(status, float_flag_invalid);
+            return float128_default_nan;
+        }
+        return packFloat128(zSign, 0x7FFF, 0, 0);
+    }
+    if (bExp == 0x7FFF) {
+        if (bSig0 | bSig1) return propagateFloat128NaN(a, b, status);
+        if ((aExp | aSig0 | aSig1) == 0) {
+            float_raise(status, float_flag_invalid);
+            return float128_default_nan;
+        }
+        return packFloat128(zSign, 0x7FFF, 0, 0);
+    }
+    if (aExp == 0) {
+        if ((aSig0 | aSig1) == 0) return packFloat128(zSign, 0, 0, 0);
+        float_raise(status, float_flag_denormal);
+        normalizeFloat128Subnormal(aSig0, aSig1, &aExp, &aSig0, &aSig1);
+    }
+    if (bExp == 0) {
+        if ((bSig0 | bSig1) == 0) return packFloat128(zSign, 0, 0, 0);
+        float_raise(status, float_flag_denormal);
+        normalizeFloat128Subnormal(bSig0, bSig1, &bExp, &bSig0, &bSig1);
+    }
+    zExp = aExp + bExp - 0x4000;
+    aSig0 |= UINT64_C(0x0001000000000000);
+    shortShift128Left(bSig0, bSig1, 16, &bSig0, &bSig1);
+    mul128To256(aSig0, aSig1, bSig0, bSig1, &zSig0, &zSig1, &zSig2, &zSig3);
+    add128(zSig0, zSig1, aSig0, aSig1, &zSig0, &zSig1);
+    zSig2 |= (zSig3 != 0);
+    if (UINT64_C(0x0002000000000000) <= zSig0) {
+        shift128ExtraRightJamming(zSig0, zSig1, zSig2, 1, &zSig0, &zSig1, &zSig2);
+        ++zExp;
+    }
+    return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of dividing the quadruple-precision floating-point value
+| `a' by the corresponding value `b'.  The operation is performed according to
+| the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 float128_div(float128 a, float128 b, float_status_t &status)
+{
+    int aSign, bSign, zSign;
+    int32_t aExp, bExp, zExp;
+    uint64_t aSig0, aSig1, bSig0, bSig1, zSig0, zSig1, zSig2;
+    uint64_t rem0, rem1, rem2, rem3, term0, term1, term2, term3;
+
+    aSig1 = extractFloat128Frac1(a);
+    aSig0 = extractFloat128Frac0(a);
+    aExp = extractFloat128Exp(a);
+    aSign = extractFloat128Sign(a);
+    bSig1 = extractFloat128Frac1(b);
+    bSig0 = extractFloat128Frac0(b);
+    bExp = extractFloat128Exp(b);
+    bSign = extractFloat128Sign(b);
+
+    zSign = aSign ^ bSign;
+    if (aExp == 0x7FFF) {
+        if (aSig0 | aSig1) return propagateFloat128NaN(a, b, status);
+        if (bExp == 0x7FFF) {
+            if (bSig0 | bSig1) return propagateFloat128NaN(a, b, status);
+            float_raise(status, float_flag_invalid);
+            return float128_default_nan;
+        }
+        return packFloat128(zSign, 0x7FFF, 0, 0);
+    }
+    if (bExp == 0x7FFF) {
+        if (bSig0 | bSig1) return propagateFloat128NaN(a, b, status);
+        return packFloat128(zSign, 0, 0, 0);
+    }
+    if (bExp == 0) {
+        if ((bSig0 | bSig1) == 0) {
+            if ((aExp | aSig0 | aSig1) == 0) {
+                float_raise(status, float_flag_invalid);
+                return float128_default_nan;
+            }
+            float_raise(status, float_flag_divbyzero);
+            return packFloat128(zSign, 0x7FFF, 0, 0);
+        }
+        float_raise(status, float_flag_denormal);
+        normalizeFloat128Subnormal(bSig0, bSig1, &bExp, &bSig0, &bSig1);
+    }
+    if (aExp == 0) {
+        if ((aSig0 | aSig1) == 0) return packFloat128(zSign, 0, 0, 0);
+        float_raise(status, float_flag_denormal);
+        normalizeFloat128Subnormal(aSig0, aSig1, &aExp, &aSig0, &aSig1);
+    }
+    zExp = aExp - bExp + 0x3FFD;
+    shortShift128Left(
+        aSig0 | UINT64_C(0x0001000000000000), aSig1, 15, &aSig0, &aSig1);
+    shortShift128Left(
+        bSig0 | UINT64_C(0x0001000000000000), bSig1, 15, &bSig0, &bSig1);
+    if (le128(bSig0, bSig1, aSig0, aSig1)) {
+        shift128Right(aSig0, aSig1, 1, &aSig0, &aSig1);
+        ++zExp;
+    }
+    zSig0 = estimateDiv128To64(aSig0, aSig1, bSig0);
+    mul128By64To192(bSig0, bSig1, zSig0, &term0, &term1, &term2);
+    sub192(aSig0, aSig1, 0, term0, term1, term2, &rem0, &rem1, &rem2);
+    while ((int64_t) rem0 < 0) {
+        --zSig0;
+        add192(rem0, rem1, rem2, 0, bSig0, bSig1, &rem0, &rem1, &rem2);
+    }
+    zSig1 = estimateDiv128To64(rem1, rem2, bSig0);
+    if ((zSig1 & 0x3FFF) <= 4) {
+        mul128By64To192(bSig0, bSig1, zSig1, &term1, &term2, &term3);
+        sub192(rem1, rem2, 0, term1, term2, term3, &rem1, &rem2, &rem3);
+        while ((int64_t) rem1 < 0) {
+            --zSig1;
+            add192(rem1, rem2, rem3, 0, bSig0, bSig1, &rem1, &rem2, &rem3);
+        }
+        zSig1 |= ((rem1 | rem2 | rem3) != 0);
+    }
+    shift128ExtraRightJamming(zSig0, zSig1, 0, 15, &zSig0, &zSig1, &zSig2);
+    return roundAndPackFloat128(zSign, zExp, zSig0, zSig1, zSig2, status);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the 64-bit two's complement integer `a' to
+| the quadruple-precision floating-point format.  The conversion is performed
+| according to the IEC/IEEE Standard for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float128 int64_to_float128(int64_t a)
+{
+    uint64_t zSig0, zSig1;
+
+    if (a == 0) return packFloat128(0, 0, 0, 0);
+    int zSign = (a < 0);
+    uint64_t absA = zSign ? - a : a;
+    uint8_t shiftCount = countLeadingZeros64(absA) + 49;
+    int32_t zExp = 0x406E - shiftCount;
+    if (64 <= shiftCount) {
+        zSig1 = 0;
+        zSig0 = absA;
+        shiftCount -= 64;
+    }
+    else {
+        zSig1 = absA;
+        zSig0 = 0;
+    }
+    shortShift128Left(zSig0, zSig1, shiftCount, &zSig0, &zSig1);
+    return packFloat128(zSign, zExp, zSig0, zSig1);
+}
+
+END_SOFTFLOAT_NS
+
+#endif
diff -r 372d3611c693 -r e89a7cf22da3 ext/softfloat/softfloat16.cc
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ext/softfloat/softfloat16.cc	Thu Sep 19 18:09:15 2013 +0200
@@ -0,0 +1,133 @@
+/*============================================================================
+This C source file is part of the SoftFloat IEC/IEEE Floating-point Arithmetic
+Package, Release 2b.
+
+Written by John R. Hauser.  This work was made possible in part by the
+International Computer Science Institute, located at Suite 600, 1947 Center
+Street, Berkeley, California 94704.  Funding was partially provided by the
+National Science Foundation under grant MIP-9311980.  The original version
+of this code was written as part of a project to build a fixed-point vector
+processor in collaboration with the University of California at Berkeley,
+overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
+is available through the Web page `http://www.cs.berkeley.edu/~jhauser/
+arithmetic/SoftFloat.html'.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+=============================================================================*/
+
+/*============================================================================
+ * Adapted for Bochs (x86 achitecture simulator) by
+ *            Stanislav Shwartsman [sshwarts at sourceforge net]
+ * ==========================================================================*/
+
+#include "softfloat.hh"
+
+#ifdef FLOAT16
+
+#include "softfloat-round-pack.hh"
+#include "softfloat-specialize.hh"
+#include "softfloat-macros.hh"
+
+BEGIN_SOFTFLOAT_NS
+
+/*----------------------------------------------------------------------------
+| Determine half-precision floating-point number class
+*----------------------------------------------------------------------------*/
+
+float_class_t float16_class(float16 a)
+{
+   int16_t aExp = extractFloat16Exp(a);
+   uint16_t aSig = extractFloat16Frac(a);
+   int  aSign = extractFloat16Sign(a);
+
+   if(aExp == 0x1F) {
+       if (aSig == 0)
+           return (aSign) ? float_negative_inf : float_positive_inf;
+
+       return float_NaN;
+   }
+
+   if(aExp == 0) {
+       if (aSig == 0) return float_zero;
+       return float_denormal;
+   }
+
+   return float_normalized;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the half-precision floating-point value
+| `a' to the single-precision floating-point format.  The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float32 float16_to_float32(float16 a, float_status_t &status)
+{
+    uint16_t aSig = extractFloat16Frac(a);
+    int16_t aExp = extractFloat16Exp(a);
+    int aSign = extractFloat16Sign(a);
+
+    if (aExp == 0x1F) {
+        if (aSig) return commonNaNToFloat32(float16ToCommonNaN(a, status));
+        return packFloat32(aSign, 0xFF, 0);
+    }
+    if (aExp == 0) {
+        // ignore denormals_are_zeros flag
+        if (aSig == 0) return packFloat32(aSign, 0, 0);
+        float_raise(status, float_flag_denormal);
+        normalizeFloat16Subnormal(aSig, &aExp, &aSig);
+        --aExp;
+    }
+
+    return packFloat32(aSign, aExp + 0x70, ((uint32_t) aSig)<<13);
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the single-precision floating-point value
+| `a' to the half-precision floating-point format.  The conversion is
+| performed according to the IEC/IEEE Standard for Binary Floating-Point
+| Arithmetic.
+*----------------------------------------------------------------------------*/
+
+float16 float32_to_float16(float32 a, float_status_t &status)
+{
+    uint32_t aSig = extractFloat32Frac(a);
+    int16_t aExp = extractFloat32Exp(a);
+    int aSign = extractFloat32Sign(a);
+
+    if (aExp == 0xFF) {
+        if (aSig) return commonNaNToFloat16(float32ToCommonNaN(a, status));
+        return packFloat16(aSign, 0x1F, 0);
+    }
+    if (aExp == 0) {
+        if (get_denormals_are_zeros(status)) aSig = 0;
+        if (aSig == 0) return packFloat16(aSign, 0, 0);
+        float_raise(status, float_flag_denormal);
+    }
+
+    aSig = shift32RightJamming(aSig, 9);
+    uint16_t zSig = (uint16_t) aSig;
+    if (aExp || zSig) {
+        zSig |= 0x4000;
+        aExp -= 0x71;
+    }
+
+    return roundAndPackFloat16(aSign, aExp, zSig, status);
+}
+
+END_SOFTFLOAT_NS
+
+#endif
diff -r 372d3611c693 -r e89a7cf22da3 ext/softfloat/softfloat_ns.hh
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ext/softfloat/softfloat_ns.hh	Thu Sep 19 18:09:15 2013 +0200
@@ -0,0 +1,38 @@
+/*
+ * Copyright (c) 2013 Andreas Sandberg
+ * All rights reserved
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions are
+ * met: redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer;
+ * redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution;
+ * neither the name of the copyright holders nor the names of its
+ * contributors may be used to endorse or promote products derived from
+ * this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *
+ * Authors: Andreas Sandberg
+ */
+
+#ifndef _SOFTFLOAT_NS_HH_
+#define _SOFTFLOAT_NS_HH_
+
+#define USING_SOFTFLOAT_NS using namespace SoftFloat
+#define BEGIN_SOFTFLOAT_NS namespace SoftFloat {
+#define END_SOFTFLOAT_NS }
+
+#endif
diff -r 372d3611c693 -r e89a7cf22da3 ext/softfloat/softfloatx80.hh
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ext/softfloat/softfloatx80.hh	Thu Sep 19 18:09:15 2013 +0200
@@ -0,0 +1,104 @@
+/*============================================================================
+This source file is an extension to the SoftFloat IEC/IEEE Floating-point
+Arithmetic Package, Release 2b, written for Bochs (x86 achitecture simulator)
+floating point emulation.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+=============================================================================*/
+
+/*============================================================================
+ * Written for Bochs (x86 achitecture simulator) by
+ *            Stanislav Shwartsman [sshwarts at sourceforge net]
+ * ==========================================================================*/
+
+#ifndef _SOFTFLOATX80_EXTENSIONS_H_
+#define _SOFTFLOATX80_EXTENSIONS_H_
+
+#include "softfloat.hh"
+#include "softfloat-specialize.hh"
+
+BEGIN_SOFTFLOAT_NS
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE integer-to-floating-point conversion routines.
+*----------------------------------------------------------------------------*/
+
+int16_t floatx80_to_int16(floatx80, float_status_t &status);
+int16_t floatx80_to_int16_round_to_zero(floatx80, float_status_t &status);
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE extended double-precision operations.
+*----------------------------------------------------------------------------*/
+
+float_class_t floatx80_class(floatx80);
+floatx80 floatx80_extract(floatx80 &a, float_status_t &status);
+floatx80 floatx80_scale(floatx80 a, floatx80 b, float_status_t &status);
+int floatx80_remainder(floatx80 a, floatx80 b, floatx80 &r, uint64_t &q, float_status_t &status);
+int floatx80_ieee754_remainder(floatx80 a, floatx80 b, floatx80 &r, uint64_t &q, float_status_t &status);
+floatx80 f2xm1(floatx80 a, float_status_t &status);
+floatx80 fyl2x(floatx80 a, floatx80 b, float_status_t &status);
+floatx80 fyl2xp1(floatx80 a, floatx80 b, float_status_t &status);
+floatx80 fpatan(floatx80 a, floatx80 b, float_status_t &status);
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE extended double-precision trigonometric functions.
+*----------------------------------------------------------------------------*/
+
+int fsincos(floatx80 a, floatx80 *sin_a, floatx80 *cos_a, float_status_t &status);
+int fsin(floatx80 &a, float_status_t &status);
+int fcos(floatx80 &a, float_status_t &status);
+int ftan(floatx80 &a, float_status_t &status);
+
+/*----------------------------------------------------------------------------
+| Software IEC/IEEE extended double-precision compare.
+*----------------------------------------------------------------------------*/
+
+int floatx80_compare(floatx80, floatx80, float_status_t &status);
+int floatx80_compare_quiet(floatx80, floatx80, float_status_t &status);
+
+/*-----------------------------------------------------------------------------
+| Calculates the absolute value of the extended double-precision floating-point
+| value `a'.  The operation is performed according to the IEC/IEEE Standard
+| for Binary Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+inline floatx80& floatx80_abs(floatx80 &reg)
+{
+    reg.exp &= 0x7FFF;
+    return reg;
+}
+
+/*-----------------------------------------------------------------------------
+| Changes the sign of the extended double-precision floating-point value 'a'.
+| The operation is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+inline floatx80& floatx80_chs(floatx80 &reg)
+{
+    reg.exp ^= 0x8000;
+    return reg;
+}
+
+/*-----------------------------------------------------------------------------
+| Commonly used extended double-precision floating-point constants.
+*----------------------------------------------------------------------------*/
+
+extern const floatx80 Const_Z;
+extern const floatx80 Const_1;
+
+END_SOFTFLOAT_NS
+
+#endif
diff -r 372d3611c693 -r e89a7cf22da3 ext/softfloat/softfloatx80.cc
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/ext/softfloat/softfloatx80.cc	Thu Sep 19 18:09:15 2013 +0200
@@ -0,0 +1,367 @@
+/*============================================================================
+This source file is an extension to the SoftFloat IEC/IEEE Floating-point
+Arithmetic Package, Release 2b, written for Bochs (x86 achitecture simulator)
+floating point emulation.
+
+THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort has
+been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT TIMES
+RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO PERSONS
+AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ALL LOSSES,
+COSTS, OR OTHER PROBLEMS THEY INCUR DUE TO THE SOFTWARE, AND WHO FURTHERMORE
+EFFECTIVELY INDEMNIFY JOHN HAUSER AND THE INTERNATIONAL COMPUTER SCIENCE
+INSTITUTE (possibly via similar legal warning) AGAINST ALL LOSSES, COSTS, OR
+OTHER PROBLEMS INCURRED BY THEIR CUSTOMERS AND CLIENTS DUE TO THE SOFTWARE.
+
+Derivative works are acceptable, even for commercial purposes, so long as
+(1) the source code for the derivative work includes prominent notice that
+the work is derivative, and (2) the source code includes prominent notice with
+these four paragraphs for those parts of this code that are retained.
+=============================================================================*/
+
+/*============================================================================
+ * Written for Bochs (x86 achitecture simulator) by
+ *            Stanislav Shwartsman [sshwarts at sourceforge net]
+ * ==========================================================================*/
+
+#include "softfloatx80.hh"
+#include "softfloat-round-pack.hh"
+#include "softfloat-macros.hh"
+
+BEGIN_SOFTFLOAT_NS
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the extended double-precision floating-
+| point value `a' to the 16-bit two's complement integer format.  The
+| conversion is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic - which means in particular that the conversion
+| is rounded according to the current rounding mode. If `a' is a NaN or the
+| conversion overflows, the integer indefinite value is returned.
+*----------------------------------------------------------------------------*/
+
+int16_t floatx80_to_int16(floatx80 a, float_status_t &status)
+{
+   if (floatx80_is_unsupported(a))
+   {
+        float_raise(status, float_flag_invalid);
+        return int16_indefinite;
+   }
+
+   int32_t v32 = floatx80_to_int32(a, status);
+
+   if ((v32 > 32767) || (v32 < -32768)) {
+        status.float_exception_flags = float_flag_invalid; // throw way other flags
+        return int16_indefinite;
+   }
+
+   return (int16_t) v32;
+}
+
+/*----------------------------------------------------------------------------
+| Returns the result of converting the extended double-precision floating-
+| point value `a' to the 16-bit two's complement integer format.  The
+| conversion is performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic, except that the conversion is always rounded
+| toward zero.  If `a' is a NaN or the conversion overflows, the integer
+| indefinite value is returned.
+*----------------------------------------------------------------------------*/
+
+int16_t floatx80_to_int16_round_to_zero(floatx80 a, float_status_t &status)
+{
+   if (floatx80_is_unsupported(a))
+   {
+        float_raise(status, float_flag_invalid);
+        return int16_indefinite;
+   }
+
+   int32_t v32 = floatx80_to_int32_round_to_zero(a, status);
+
+   if ((v32 > 32767) || (v32 < -32768)) {
+        status.float_exception_flags = float_flag_invalid; // throw way other flags
+        return int16_indefinite;
+   }
+
+   return (int16_t) v32;
+}
+
+/*----------------------------------------------------------------------------
+| Separate the source extended double-precision floating point value `a'
+| into its exponent and significand, store the significant back to the
+| 'a' and return the exponent. The operation performed is a superset of
+| the IEC/IEEE recommended logb(x) function.
+*----------------------------------------------------------------------------*/
+
+floatx80 floatx80_extract(floatx80 &a, float_status_t &status)
+{
+    uint64_t aSig = extractFloatx80Frac(a);
+    int32_t aExp = extractFloatx80Exp(a);
+    int   aSign = extractFloatx80Sign(a);
+
+    if (floatx80_is_unsupported(a))
+    {
+        float_raise(status, float_flag_invalid);
+        a = floatx80_default_nan;
+        return a;
+    }
+
+    if (aExp == 0x7FFF) {
+        if ((uint64_t) (aSig<<1))
+        {
+            a = propagateFloatx80NaN(a, status);
+            return a;
+        }
+        return packFloatx80(0, 0x7FFF, UINT64_C(0x8000000000000000));
+    }
+    if (aExp == 0)
+    {
+        if (aSig == 0) {
+            float_raise(status, float_flag_divbyzero);
+            a = packFloatx80(aSign, 0, 0);
+            return packFloatx80(1, 0x7FFF, UINT64_C(0x8000000000000000));
+        }
+        float_raise(status, float_flag_denormal);
+        normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
+    }
+
+    a.exp = (aSign << 15) + 0x3FFF;
+    a.fraction = aSig;
+    return int32_to_floatx80(aExp - 0x3FFF);
+}
+
+/*----------------------------------------------------------------------------
+| Scales extended double-precision floating-point value in operand `a' by
+| value `b'. The function truncates the value in the second operand 'b' to
+| an integral value and adds that value to the exponent of the operand 'a'.
+| The operation performed according to the IEC/IEEE Standard for Binary
+| Floating-Point Arithmetic.
+*----------------------------------------------------------------------------*/
+
+floatx80 floatx80_scale(floatx80 a, floatx80 b, float_status_t &status)
+{
+    int32_t aExp, bExp;
+    uint64_t aSig, bSig;
+
+    // handle unsupported extended double-precision floating encodings
+    if (floatx80_is_unsupported(a) || floatx80_is_unsupported(b))
+    {
+        float_raise(status, float_flag_invalid);
+        return floatx80_default_nan;
+    }
+
+    aSig = extractFloatx80Frac(a);
+    aExp = extractFloatx80Exp(a);
+    int aSign = extractFloatx80Sign(a);
+    bSig = extractFloatx80Frac(b);
+    bExp = extractFloatx80Exp(b);
+    int bSign = extractFloatx80Sign(b);
+
+    if (aExp == 0x7FFF) {
+        if ((uint64_t) (aSig<<1) || ((bExp == 0x7FFF) && (uint64_t) (bSig<<1)))
+        {
+            return propagateFloatx80NaN(a, b, status);
+        }
+        if ((bExp == 0x7FFF) && bSign) {
+            float_raise(status, float_flag_invalid);
+            return floatx80_default_nan;
+        }
+        if (bSig && (bExp == 0)) float_raise(status, float_flag_denormal);
+        return a;
+    }
+    if (bExp == 0x7FFF) {
+        if ((uint64_t) (bSig<<1)) return propagateFloatx80NaN(a, b, status);
+        if ((aExp | aSig) == 0) {
+            if (! bSign) {
+                float_raise(status, float_flag_invalid);
+                return floatx80_default_nan;
+            }
+            return a;
+        }
+        if (aSig && (aExp == 0)) float_raise(status, float_flag_denormal);
+        if (bSign) return packFloatx80(aSign, 0, 0);
+        return packFloatx80(aSign, 0x7FFF, UINT64_C(0x8000000000000000));
+    }
+    if (aExp == 0) {
+        if (bSig && (bExp == 0)) float_raise(status, float_flag_denormal);
+        if (aSig == 0) return a;
+        float_raise(status, float_flag_denormal);
+        normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
+        if (bExp < 0x3FFF)
+            return normalizeRoundAndPackFloatx80(80, aSign, aExp, aSig, 0, status);
+    }
+    if (bExp == 0) {
+        if (bSig == 0) return a;
+        float_raise(status, float_flag_denormal);
+        normalizeFloatx80Subnormal(bSig, &bExp, &bSig);
+    }
+
+    if (bExp > 0x400E) {
+        /* generate appropriate overflow/underflow */
+        return roundAndPackFloatx80(80, aSign,
+                          bSign ? -0x3FFF : 0x7FFF, aSig, 0, status);
+    }
+
+    if (bExp < 0x3FFF) return a;
+
+    int shiftCount = 0x403E - bExp;
+    bSig >>= shiftCount;
+    int32_t scale = (int32_t) bSig;
+    if (bSign) scale = -scale; /* -32768..32767 */
+    return
+        roundAndPackFloatx80(80, aSign, aExp+scale, aSig, 0, status);
+}
+
+/*----------------------------------------------------------------------------
+| Determine extended-precision floating-point number class.
+*----------------------------------------------------------------------------*/
+
+float_class_t floatx80_class(floatx80 a)
+{
+   int32_t aExp = extractFloatx80Exp(a);
+   uint64_t aSig = extractFloatx80Frac(a);
+
+   if(aExp == 0) {
+       if (aSig == 0)
+           return float_zero;
+
+       /* denormal or pseudo-denormal */
+       return float_denormal;
+   }
+
+   /* valid numbers have the MS bit set */
+   if (!(aSig & UINT64_C(0x8000000000000000)))
+       return float_NaN; /* report unsupported as NaNs */
+
+   if(aExp == 0x7fff) {
+       int aSign = extractFloatx80Sign(a);
+
+       if (((uint64_t) (aSig<< 1)) == 0)
+           return (aSign) ? float_negative_inf : float_positive_inf;
+
+       return float_NaN;
+   }
+
+   return float_normalized;
+}
+
+/*----------------------------------------------------------------------------
+| Compare  between  two extended precision  floating  point  numbers. Returns
+| 'float_relation_equal'  if the operands are equal, 'float_relation_less' if
+| the    value    'a'   is   less   than   the   corresponding   value   `b',
+| 'float_relation_greater' if the value 'a' is greater than the corresponding
+| value `b', or 'float_relation_unordered' otherwise.
+*----------------------------------------------------------------------------*/
+
+int floatx80_compare(floatx80 a, floatx80 b, float_status_t &status)
+{
+    float_class_t aClass = floatx80_class(a);
+    float_class_t bClass = floatx80_class(b);
+
+    if (aClass == float_NaN || bClass == float_NaN)
+    {
+        float_raise(status, float_flag_invalid);
+        return float_relation_unordered;
+    }
+
+    if (aClass == float_denormal || bClass == float_denormal)
+    {
+        float_raise(status, float_flag_denormal);
+    }
+
+    int aSign = extractFloatx80Sign(a);
+    int bSign = extractFloatx80Sign(b);
+
+    if (aClass == float_zero) {
+        if (bClass == float_zero) return float_relation_equal;
+        return bSign ? float_relation_greater : float_relation_less;
+    }
+
+    if (bClass == float_zero || aSign != bSign) {
+        return aSign ? float_relation_less : float_relation_greater;
+    }
+
+    uint64_t aSig = extractFloatx80Frac(a);
+    int32_t aExp = extractFloatx80Exp(a);
+    uint64_t bSig = extractFloatx80Frac(b);
+    int32_t bExp = extractFloatx80Exp(b);
+
+    if (aClass == float_denormal)
+        normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
+
+    if (bClass == float_denormal)
+        normalizeFloatx80Subnormal(bSig, &bExp, &bSig);
+
+    if (aExp == bExp && aSig == bSig)
+        return float_relation_equal;
+
+    int less_than =
+        aSign ? ((bExp < aExp) || ((bExp == aExp) && (bSig < aSig)))
+              : ((aExp < bExp) || ((aExp == bExp) && (aSig < bSig)));
+
+    if (less_than) return float_relation_less;
+    return float_relation_greater;
+}
+
+/*----------------------------------------------------------------------------
+| Compare  between  two extended precision  floating  point  numbers. Returns
+| 'float_relation_equal'  if the operands are equal, 'float_relation_less' if
+| the    value    'a'   is   less   than   the   corresponding   value   `b',
+| 'float_relation_greater' if the value 'a' is greater than the corresponding
+| value `b', or 'float_relation_unordered' otherwise. Quiet NaNs do not cause
+| an exception.
+*----------------------------------------------------------------------------*/
+
+int floatx80_compare_quiet(floatx80 a, floatx80 b, float_status_t &status)
+{
+    float_class_t aClass = floatx80_class(a);
+    float_class_t bClass = floatx80_class(b);
+
+    if (aClass == float_NaN || bClass == float_NaN)
+    {
+        if (floatx80_is_unsupported(a) || floatx80_is_unsupported(b))
+            float_raise(status, float_flag_invalid);
+
+        if (floatx80_is_signaling_nan(a) || floatx80_is_signaling_nan(b))
+            float_raise(status, float_flag_invalid);
+
+        return float_relation_unordered;
+    }
+
+    if (aClass == float_denormal || bClass == float_denormal)
+    {
+        float_raise(status, float_flag_denormal);
+    }
+
+    int aSign = extractFloatx80Sign(a);
+    int bSign = extractFloatx80Sign(b);
+
+    if (aClass == float_zero) {
+        if (bClass == float_zero) return float_relation_equal;
+        return bSign ? float_relation_greater : float_relation_less;
+    }
+
+    if (bClass == float_zero || aSign != bSign) {
+        return aSign ? float_relation_less : float_relation_greater;
+    }
+
+    uint64_t aSig = extractFloatx80Frac(a);
+    int32_t aExp = extractFloatx80Exp(a);
+    uint64_t bSig = extractFloatx80Frac(b);
+    int32_t bExp = extractFloatx80Exp(b);
+
+    if (aClass == float_denormal)
+        normalizeFloatx80Subnormal(aSig, &aExp, &aSig);
+
+    if (bClass == float_denormal)
+        normalizeFloatx80Subnormal(bSig, &bExp, &bSig);
+
+    if (aExp == bExp && aSig == bSig)
+        return float_relation_equal;
+
+    int less_than =
+        aSign ? ((bExp < aExp) || ((bExp == aExp) && (bSig < aSig)))
+              : ((aExp < bExp) || ((aExp == bExp) && (aSig < bSig)));
+
+    if (less_than) return float_relation_less;
+    return float_relation_greater;
+}
+
+END_SOFTFLOAT_NS