/* Long division, unsigned (64/32 ==> 32). This procedure performs unsigned "long division" i.e., division of a 64-bit unsigned dividend by a 32-bit unsigned divisor, producing a 32-bit quotient. In the overflow cases (divide by 0, or quotient exceeds 32 bits), it returns a remainder of 0xFFFFFFFF (an impossible value). The dividend is u1 and u0, with u1 being the most significant word. The divisor is parameter v. The value returned is the quotient. Max line length is 57, to fit in hacker.book. */ #include #include //To define "exit", req'd by XLC. #define max(x, y) ((x) > (y) ? (x) : (y)) int nlz(unsigned x) { int n; if (x == 0) return(32); n = 0; if (x <= 0x0000FFFF) {n = n +16; x = x <<16;} if (x <= 0x00FFFFFF) {n = n + 8; x = x << 8;} if (x <= 0x0FFFFFFF) {n = n + 4; x = x << 4;} if (x <= 0x3FFFFFFF) {n = n + 2; x = x << 2;} if (x <= 0x7FFFFFFF) {n = n + 1;} return n; } unsigned myrand(void) { // Expands rand() to 32 bits. return rand() ^ (rand() << 1); } /* This first version is Knuth's Algorithm D (my "divmnu") pretty much mechanically reduced to the special case m = 4, n = 2. But some details differ. The parameters are fullwords passed in registers, and it returns an error indication if the result cannot be expressed in 32 bits (divmnu would return a 3-halfword quotient). */ unsigned divlu(unsigned u1, unsigned u0, unsigned v, unsigned *r) { const unsigned b = 65536; // Number base (16 bits). unsigned short un[4], // Dividend and divisor vn[2]; // normalized and broken // up into halfwords. unsigned short q[2]; // Quotient as halfwords. unsigned un1, un0, // Dividend and divisor vn0; // as fullwords. unsigned qhat; // Estimated quotient digit. unsigned rhat; // A remainder. unsigned p; // Product of two digits. int s, i, j, t, k; if (u1 >= v) { // If overflow, set rem. if (r != NULL) // to an impossible value, *r = 0xFFFFFFFF; // and return the largest return 0xFFFFFFFF;} // possible quotient. s = nlz(v); // 0 <= s <= 31. vn0 = v << s; // Normalize divisor. vn[1] = vn0 >> 16; // Break divisor up into vn[0] = vn0 & 0xFFFF; // two 16-bit halves. // Shift dividend left. un1 = (u1 << s) | (u0 >> 32 - s) & (-s >> 31); un0 = u0 << s; un[3] = un1 >> 16; // Break dividend up into un[2] = un1; // four 16-bit halfwords un[1] = un0 >> 16; // Note: storing into un[0] = un0; // halfwords truncates. for (j = 1; j >= 0; j--) { // Compute estimate qhat of q[j]. qhat = (un[j+2]*b + un[j+1])/vn[1]; rhat = (un[j+2]*b + un[j+1]) - qhat*vn[1]; again: if (qhat >= b || qhat*vn[0] > b*rhat + un[j]) { qhat = qhat - 1; rhat = rhat + vn[1]; if (rhat < b) goto again; } // Multiply and subtract. k = 0; for (i = 0; i < 2; i++) { p = qhat*vn[i]; t = un[i+j] - k - (p & 0xFFFF); un[i+j] = t; k = (p >> 16) - (t >> 16); } t = un[j+2] - k; un[j+2] = t; q[j] = qhat; // Store quotient digit. if (t < 0) { // If we subtracted too q[j] = q[j] - 1; // much, add back. printf("Doing add back, %08x %08x %08x\n", u1, u0, v); k = 0; for (i = 0; i < 2; i++) { t = un[i+j] + vn[i] + k; un[i+j] = t; k = t >> 16; } un[j+2] = un[j+2] + k; } } // End j. if (r != NULL) // If remainder is wanted, *r = (un[1]*b + un[0]) >> s; // return it. return q[1]*b + q[0]; } /* This version is simplified from Knuth's because the adding back step is never required, as the estimated quotient digit qhat is always exact. Also, the inner loop is coded in a more efficient way (not possible in divmnu), unrolled, and obvious resulting simplifications made. And, the quotient and divisor are elementary variables (kept in registers) rather than arrays. */ unsigned divlu1(unsigned u1, unsigned u0, unsigned v, unsigned *r) { const unsigned b = 65536; // Number base (16 bits). unsigned short un[4], // Dividend and divisor vn1, vn0; // normalized and broken // up into halfwords. unsigned q; // Quotient. unsigned un1, un0; // Dividend as fullwords. unsigned qhat; // Estimated quotient digit. unsigned rhat; // A remainder. unsigned p; // Product of two digits. int s, j, t, k; if (u1 >= v) { // If overflow, set rem. if (r != NULL) // to an impossible value, *r = 0xFFFFFFFF; // and return the largest return 0xFFFFFFFF;} // possible quotient. s = nlz(v); // 0 <= s <= 31. v = v << s; // Normalize divisor. vn1 = v >> 16; // Break divisor up into vn0 = v & 0xFFFF; // two 16-bit halves. // Shift dividend left. un1 = (u1 << s) | (u0 >> 32 - s) & (-s >> 31); un0 = u0 << s; un[3] = un1 >> 16; // Break dividend up into un[2] = un1; // four 16-bit halfwords un[1] = un0 >> 16; // Note: storing into un[0] = un0; // halfwords truncates. for (j = 1; j >= 0; j--) { // Compute estimate qhat of current digit of q. qhat = (un[j+2]*b + un[j+1])/vn1; rhat = (un[j+2]*b + un[j+1]) - qhat*vn1; again: if (qhat >= b || qhat*vn0 > b*rhat + un[j]) { qhat = qhat - 1; rhat = rhat + vn1; if (rhat < b) goto again; } q = q*b + qhat; // Put away quotient digit. // Multiply and subtract. p = qhat*vn0; t = un[j] - p; un[j] = t; // Store right half of t. k = t >> 16; p = qhat*vn1; t = un[j+1] + k - p; un[j+1] = t; // Store right half of t. } // End j. if (r != NULL) // If remainder is wanted, *r = (un[1]*b + un[0]) >> s; // return it. return q; } /* This version is divlu1 except with outer loop unrolled, and array un changed into local variables. Several of the variables below could be "short," but having them fullwords gives better code on gcc/Intel. Statistics: Based on one million executions of this program, with uniformly distributed random values for the dividend and divisor, the number of times in each loop per execution of the program is: again1: 0.4060 again2: 0.3469 This is the version that's in the hacker book. */ // ------------------------------ cut ---------------------------------- unsigned divlu2(unsigned u1, unsigned u0, unsigned v, unsigned *r) { const unsigned b = 65536; // Number base (16 bits). unsigned un1, un0, // Norm. dividend LSD's. vn1, vn0, // Norm. divisor digits. q1, q0, // Quotient digits. un32, un21, un10,// Dividend digit pairs. rhat; // A remainder. int s; // Shift amount for norm. if (u1 >= v) { // If overflow, set rem. if (r != NULL) // to an impossible value, *r = 0xFFFFFFFF; // and return the largest return 0xFFFFFFFF;} // possible quotient. s = nlz(v); // 0 <= s <= 31. v = v << s; // Normalize divisor. vn1 = v >> 16; // Break divisor up into vn0 = v & 0xFFFF; // two 16-bit digits. un32 = (u1 << s) | (u0 >> 32 - s) & (-s >> 31); un10 = u0 << s; // Shift dividend left. un1 = un10 >> 16; // Break right half of un0 = un10 & 0xFFFF; // dividend into two digits. q1 = un32/vn1; // Compute the first rhat = un32 - q1*vn1; // quotient digit, q1. again1: if (q1 >= b || q1*vn0 > b*rhat + un1) { q1 = q1 - 1; rhat = rhat + vn1; if (rhat < b) goto again1;} un21 = un32*b + un1 - q1*v; // Multiply and subtract. q0 = un21/vn1; // Compute the second rhat = un21 - q0*vn1; // quotient digit, q0. again2: if (q0 >= b || q0*vn0 > b*rhat + un0) { q0 = q0 - 1; rhat = rhat + vn1; if (rhat < b) goto again2;} if (r != NULL) // If remainder is wanted, *r = (un21*b + un0 - q0*v) >> s; // return it. return q1*b + q0; } // ------------------------------ cut ---------------------------------- int errors; void check(char *name, unsigned q, unsigned r, unsigned u1, unsigned u0, unsigned v, unsigned cq, unsigned cr) { if (q != cq || r != cr) { errors = errors + 1; printf("Error in %s, dividend u = %08x %08x\n", name, u1, u0); printf(" divisor v = %08x\n", v); printf("Got q = %08x, r = %08x\n", q, r); printf("Should get cq = %08x, cr = %08x\n", cq, cr); } return; } int main() { static unsigned test[] = { // u..., v..., 0, 0, 0, // Overflow. 1, 0, 0, // Overflow. 1, 0, 1, // Overflow. 0xffffffff, 0, 0xffffffff, // Overflow. 0, 1, 1, 0, 7, 3, 0x00000000,0xffffffff, 0x00000001, 0x01234567,0x89abcdef, 0x01234568, 0x01234567,0x89abcdef, 0x12345678, 0x01234567,0x89abcdef, 0x80000000, 0x01234567,0x89abcdef, 0x80000001, 0x01234567,0x89abcdef, 0x80007fff, 0x01234567,0x89abcdef, 0x80008000, 0x01234567,0x89abcdef, 0x80008001, 0x01234567,0x89abcdef, 0x8000ffff, 0x00000000,0x00000000, 0x0000ffff, 0x00000000,0x0000ffff, 0x0000ffff, 0x00000000,0xffff0000, 0x0000ffff, 0x00000000,0xffffffff, 0x0000ffff, 0x0000fffe,0x00000000, 0x0000ffff, 0x0000fffe,0x0000ffff, 0x0000ffff, 0x0000fffe,0xffff0000, 0x0000ffff, 0x0000fffe,0xffffffff, 0x0000ffff, 0x00000000,0x00000000, 0xffff0000, 0x00000000,0x0000ffff, 0xffff0000, 0x00000000,0xffff0000, 0xffff0000, 0x00000000,0xffffffff, 0xffff0000, 0x0000ffff,0x00000000, 0xffff0000, 0x0000ffff,0x0000ffff, 0xffff0000, 0x0000ffff,0xffff0000, 0xffff0000, 0x0000ffff,0xffffffff, 0xffff0000, 0xfffe0000,0x00000000, 0xffff0000, 0xfffe0000,0x0000ffff, 0xffff0000, 0xfffe0000,0xffff0000, 0xffff0000, 0xfffe0000,0xffffffff, 0xffff0000, 0xfffeffff,0x00000000, 0xffff0000, 0xfffeffff,0x0000ffff, 0xffff0000, 0xfffeffff,0xffff0000, 0xffff0000, 0xfffeffff,0xffffffff, 0xffff0000, 0x00000000,0x00000000, 0xffffffff, 0x00000000,0x0000ffff, 0xffffffff, 0x00000000,0xffff0000, 0xffffffff, 0x00000000,0xffffffff, 0xffffffff, 0x0000ffff,0x00000000, 0xffffffff, 0x0000ffff,0x0000ffff, 0xffffffff, 0x0000ffff,0xffff0000, 0xffffffff, 0x0000ffff,0xffffffff, 0xffffffff, 0xffff0000,0x00000000, 0xffffffff, 0xffff0000,0x0000ffff, 0xffffffff, 0xffff0000,0xffff0000, 0xffffffff, 0xffff0000,0xffffffff, 0xffffffff, 0xfffffffe,0x00000000, 0xffffffff, 0xfffffffe,0x0000ffff, 0xffffffff, 0xfffffffe,0xffff0000, 0xffffffff, 0xfffffffe,0xffffffff, 0xffffffff, 0x12345678,0x9abcdef0, 0x12345679, 0x00010000,0x00000000, 0x00010001, 0x80008000,0x00000000, 0x80008001, // First qhat = b + 1. 0x8000fffe,0xffffffff, 0x8000ffff, // First qhat = b + 1. 0x7fff8000,0x00000000, 0x8000ffff, // First qhat too big by 2. }; int i, n; unsigned u1, u0, v, q, r, cq, cr; n = sizeof(test)/12 + 10; for (i = 0; i < n; i++) { if (i < sizeof(test)/12) { u1 = test[3*i]; u0 = test[3*i+1]; v = test[3*i+2]; } else { u1 = myrand(); u0 = myrand(); v = myrand(); if (v == 0) v = 1; if (u1 >= v) u1 = u1 & (v - 1); } if (i <= 3) { // If intentional overflow case: cq = 0xFFFFFFFF;; cr = 0xFFFFFFFF; } else { // Non-overflow case: cq = ((unsigned long long)u1*0x100000000LL + u0)/v; cr = ((unsigned long long)u1*0x100000000LL + u0) - (unsigned long long)cq*v; } // printf("u = %08x %08x v = %08x cq = %08x cr = %08x\r\n", u1, u0, v, cq, cr); q = divlu(u1, u0, v, &r); check("divlu", q, r, u1, u0, v, cq, cr); q = divlu1(u1, u0, v, &r); check("divlu1", q, r, u1, u0, v, cq, cr); q = divlu2(u1, u0, v, &r); check("divlu2", q, r, u1, u0, v, cq, cr); } if (errors == 0) printf("Passed all %d cases.\n", n); else printf("Got %d errors out of %d cases.\n", errors, n); }