/* This file contains a program for doing 64/64 ==> 64 division, on a machine that does not have that instruction but that does have instructions for "long division" (64/32 ==> 32). Code for unsigned division is given first, followed by a simple program for doing the signed version by using the unsigned version. These programs are useful in implementing "long long" (64-bit) arithmetic on a machine that has the long division instruction. It will work on 64- and 32-bit machines, provided the compiler implements long long's (64-bit integers). It is desirable that the machine have the Count Leading Zeros instruction. In the GNU world, these programs are known as __divdi3 and __udivdi3, and similar names are used here. Max line length is 57, to fit in hacker.book. */ #include /* ----------------------------- nlz64 ------------------------------ */ int nlz64(unsigned long long x) { int n; if (x == 0) return(64); n = 0; if (x <= 0x00000000FFFFFFFF) {n = n + 32; x = x << 32;} if (x <= 0x0000FFFFFFFFFFFF) {n = n + 16; x = x << 16;} if (x <= 0x00FFFFFFFFFFFFFF) {n = n + 8; x = x << 8;} if (x <= 0x0FFFFFFFFFFFFFFF) {n = n + 4; x = x << 4;} if (x <= 0x3FFFFFFFFFFFFFFF) {n = n + 2; x = x << 2;} if (x <= 0x7FFFFFFFFFFFFFFF) {n = n + 1;} return n; } // ---------------------------- udivdi3 -------------------------------- /* The variables u0, u1, etc. take on only 32-bit values, but they are declared long long to avoid some compiler warning messages and to avoid some unnecessary EXTRs that the compiler would put in, to convert long longs to ints. First the procedure takes care of the case in which the divisor is a 32-bit quantity. There are two subcases: (1) If the left half of the dividend is less than the divisor, one execution of DIVU is all that is required (overflow is not possible). (2) Otherwise it does two divisions, using the grade school method, with variables used as suggested below. q1 q0 ________ v) u1 u0 q1*v ____ k u0 */ /* These macros must be used with arguments of the appropriate type (unsigned long long for DIVU and long long for DIVS. They are simulations of the presumed machines ops. I.e., they look at only the low-order 32 bits of the divisor, they return garbage if the division overflows, and they return garbage in the high-order half of the quotient doubleword. In practice, these would be replaced with uses of the machine's DIVU and DIVS instructions (e.g., by using the GNU "asm" facility). */ #define DIVU(u, v) ({unsigned long long __v = (v) & 0xFFFFFFFF; \ long long __q = (u)/(v); \ __q > 0xFFFFFFFF ? 0xdeaddeadbeefbeef : \ __q | 0xdeadbeef00000000;}) #define DIVS(u, v) ({long long __v = (v) << 32 >> 32; \ long long __q = (u)/(__v); \ __q < (long long)0xFFFFFFFF80000000ll || \ __q > (long long)0x000000007FFFFFFFll ? \ 0xfadefadedeafdeaf : __q | 0xfadedeaf00000000;}) // ------------------------------ cut ---------------------------------- unsigned long long udivdi3(unsigned long long u, unsigned long long v) { unsigned long long u0, u1, v1, q0, q1, k, n; if (v >> 32 == 0) { // If v < 2**32: if (u >> 32 < v) // If u/v cannot overflow, return DIVU(u, v) // just do one division. & 0xFFFFFFFF; else { // If u/v would overflow: u1 = u >> 32; // Break u up into two u0 = u & 0xFFFFFFFF; // halves. q1 = DIVU(u1, v) // First quotient digit. & 0xFFFFFFFF; k = u1 - q1*v; // First remainder, < v. q0 = DIVU((k << 32) + u0, v) // 2nd quot. digit. & 0xFFFFFFFF; return (q1 << 32) + q0; } } // Here v >= 2**32. n = nlz64(v); // 0 <= n <= 31. v1 = (v << n) >> 32; // Normalize the divisor // so its MSB is 1. u1 = u >> 1; // To ensure no overflow. q1 = DIVU(u1, v1) // Get quotient from & 0xFFFFFFFF; // divide unsigned insn. q0 = (q1 << n) >> 31; // Undo normalization and // division of u by 2. if (q0 != 0) // Make q0 correct or q0 = q0 - 1; // too small by 1. if ((u - q0*v) >= v) q0 = q0 + 1; // Now q0 is correct. return q0; } // ---------------------------- end cut -------------------------------- /* The next ca. 140 lines is a proof that the udivdi3 code is correct for the case in which v >= 2**32 (the hard case). We'll need these basics (integer variables): floor(floor(a/b)/d) = floor(a/(bd)). (1) b*floor(a/b) = a - rem(a, b). (2) From the first line of this section of the procedure, 0 <= n <= 31. In computing v1, the left shift clearly cannot overflow. Therefore v1 = floor(v/2**(32-n)). u1 = floor(u/2). In computing q1, u1 and v1 are in range for the DIVU instruction and it cannot overflow. Hence q1 = floor(u1/v1). In the first computation of q0, the left shift cannot overflow because q1 < 2**32 (because the maximum value of u1 is 2**63 - 1 and the minimum value of v1 is 2**31). Therefore q0 = floor(q1/2**(31-n)). Now, for the main part of the proof, we want to show that floor(u/v) <= q0 <= floor(u/v) + 1, which is to say, the first computation of q0 is the desired result or is that plus 1. Using (1) twice gives q0 = floor(u/((2**(32-n))v1)) = floor(u/((2**(32-n))floor(v/2**(32-n)))). Using (2) gives q0 = floor(u/(v - rem(v, 2**(32-n))). Using algebra to get this in the form u/v + something: (u u(rem(v, 2**(32-n))) ) q0 = floor(- + ------------------------). (v v(v - rem(v, 2**(32-n)))) This is of the form floor(u/v + delta), and we will now show that delta < 1. delta is largest when rem(v, 2**(32-n)) is as large as possible and, given that, when v is as small as possible. The maximum value of rem(v, 2**(32-n)) is 2**(32-n) - 1. Because v >= 2**(63-n), the smallest value of v having that remainder is 2**(63-n) + 2**(32-n) - 1. Thus u(2**(32-n) - 1) delta <= ------------------------------------- (2**(63-n) + 2**(32-n) - 1)*2**(63-n) u(2**(32-n) - 1) < ---------------- (2**(63-n))**2 By inspection, for n in its range of 0 to 31, delta < u/2**64. Since u is at most 2**64 - 1, delta < 1. Because q0 = floor(u/v + delta) and delta < 1 (and obviously delta >= 0), floor(u/v) <= q0 <= floor(u/v) + 1. To correct this by subtracting 1 when necessary, we would like to code if (u < q0*v) q0 = q0 - 1; (i.e., if the remainder u - q0*v is negative, subtract 1 from q0). However, this doesn't quite work because q0*v can overflow (e.g., for u = 2**64 - 1 and v = 2**32 + 3). Instead, we subtract 1 from q0, so that it is either correct or too SMALL by 1. Then, q0*v will not overflow. We must avoid subtracting 1 if q0 = 0 (if q0 = 0, it is already the correct quotient). Then the final correction is: if ((u - q0*v) >= v) q0 = q0 + 1; To see that this is a valid computation, we already noted that q0*v does not overflow. It is easy to show that 0 <= u - q0*v < 2*v. If v is very large (>= 2**63), can the subtraction overflow? No, because u < 2**64 and q0*v >= 0. Incidentally, there are alternatives to the lines if (q0 != 0) // Make q0 correct or q0 = q0 - 1; // too small by 1. that may be preferable on some machines. One is to replace them with if (q0 == 0) return 0; Another is to place at the beginning of this section of the procedure, or in fact at the beginning of the whole procedure, the line if (u < v) return 0; // Avoid a problem later. These alternatives are preferable if branches are not costly. The way it is coded above is good if the machine's comparison instructions produce a 0/1 integer result. Then, the compiler can change it to, in effect, q0 = q0 - (q0 != 0); (or you can code it that way if your compiler doesn't do this optimization). This is just a compare and subtract on such machines. */ // ------------------------- End Gory Proof ---------------------------- // ----------------------------- divdi3 -------------------------------- /* This routine presumes that smallish cases (those which can be done in one execution of DIVS) are common. If this is not the case, the test for this case should be deleted. Note that the test for when DIVS can be used is not entirely accurate. For example, DIVS is not used if v = 0xFFFFFFFF8000000, whereas if could be (if u is sufficiently small in magnitude). */ // ------------------------------ cut ---------------------------------- #define llabs(x) \ ({unsigned long long t = (x) >> 63; ((x) ^ t) - t;}) long long divdi3(long long u, long long v) { unsigned long long au, av; long long q, t; au = llabs(u); av = llabs(v); if (av >> 31 == 0) { // If |v| < 2**31 and // if (v << 32 >> 32 == v) { // If v is in range and if (au < av << 31) { // |u|/|v| cannot q = DIVS(u, v); // overflow, use DIVS. return (q << 32) >> 32; } } q = au/av; // Invoke udivdi3. t = (u ^ v) >> 63; // If u, v have different return (q ^ t) - t; // signs, negate q. } // ---------------------------- end cut -------------------------------- // ------------------------------ main --------------------------------- int main() { /* First test unsigned division. */ /* The entries in this table are used in all combinations. */ static unsigned long long tabu[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 1000, 2003, 32765, 32766, 32767, 32768, 32769, 32760, 65533, 65534, 65535, 65536, 65537, 65538, 0x7ffffffe, 0x7fffffff, 0x80000000, 0x80000001, 0x7000000000000000, 0x7000000080000000, 0x7000000080000001, 0x7fffffffffffffff, 0x7fffffff8fffffff, 0x7fffffff8ffffff1, 0x7fffffff00000000, 0x7fffffff80000000, 0x7fffffff00000001, 0x8000000000000000, 0x8000000080000000, 0x8000000080000001, 0xc000000000000000, 0xc000000080000000, 0xc000000080000001, 0xfffffffffffffffd, 0xfffffffffffffffe, 0xffffffffffffffff, }; /* The entries in this table are used in all combinations, with + and - signs preceding them. */ static long long tabs[] = {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 1000, 2003, 32765, 32766, 32767, 32768, 32769, 32760, 65533, 65534, 65535, 65536, 65537, 65538, 0x7ffffffe, 0x7fffffff, 0x80000000, 0x80000001, 0x7000000000000000, 0x7000000080000000, 0x7000000080000001, 0x7fffffffffffffff, 0x7fffffff8fffffff, 0x7fffffff8ffffff1, 0x7fffffff00000000, 0x7fffffff80000000, 0x7fffffff00000001, 0x8000000000000000, 0x8000000080000000, 0x8000000080000001, 0x0123456789abcdef, 0x00000000abcdef01, 0x0000000012345678, }; int n, i, j, k, errs = 0; unsigned long long uu, vu, qu, ru; long long u, v, q, r; printf("Testing unsigned division.\n"); n = sizeof(tabu)/sizeof(tabu[0]); for (i = 0; i < n; i++) { for (j = 1; j < n; j++) { // Skip tabu[0], which is 0. uu = tabu[i]; vu = tabu[j]; qu = udivdi3(uu, vu); // Call the program being tested. ru = uu - qu*vu; if (qu > uu || ru >= vu) { printf("Error for %016llx/%016llx, got %016llx rem %016llx\n", uu, vu, qu, ru); errs = errs + 1; } } } if (errs == 0) printf("Passed all %d tests (unsigned).\n", n*(n-1)); else printf("Failed %d cases (unsigned).\n", errs); if (errs != 0) return errs; /* Now test signed division. */ printf("Testing signed division.\n"); n = sizeof(tabs)/sizeof(tabs[0]); for (i = 0; i < n; i++) { for (j = 1; j < n; j++) { // Skip tabs[0], which is 0. u = tabs[i]; v = tabs[j]; for (k = 0; k <= 3; k++) { // Do all combinations of +, -. if (k & 1) u = -u; if (k >= 2) v = -v; q = divdi3(u, v); // Call the program being tested. r = u - q*v; if (llabs(q) > llabs(u) || llabs(r) >= llabs(v) || (r != 0 && (r ^ u) < 0)) { printf("Error for %016llx/%016llx, got %016llx rem %016llx\n", u, v, q, r); errs = errs + 1; } } } } if (errs == 0) printf("Passed all %d tests (signed).\n", n*(n-1)*4); else printf("Failed %d cases (signed).\n", errs); return errs; }