/* Divide signed by various constants d by multiplying by an
approximate reciprocal of d, computing the remainder, and correcting by
adding r/d.  Max line length is 57, to fit in hacker.book. */

#include <stdio.h>

/* The code below uses the code on pp. 207-208 and Figure 8-2 on p. 174
to do the multiply high signed. 16 ops, including 4 multiplies. This
code is not in the book because it's very similar to the unsigned
version and because the following procedure (divs3b) is better. */

int divs3a(int n) {
   int n0, n1, w0, w1, w2, t, q;

   n0 = n & 0xFFFF;
   n1 = n >> 16;
   w0 = n0*0x5556;
   t  = n1*0x5556 + (w0 >> 16);
   w1 = t & 0xFFFF;
   w2 = t >> 16;
   w1 = n0*0x5555 + w1;
   q = n1*0x5555 + w2 + (w1 >> 16);
   return q - (n >> 31);        // Add 1 if n < 0.
}

/* The code below multiplies by 1/3 (binary 0.010101...) by a series of
shifts right and adds. Each step introduces an error, due to bits being
shifted off the right end of the register. The error is corrected at the
end, by computing the remainder r with respect to the approximate
quotient, and adding r/3. This method is adapted from a suggestion by
GLS for unsigned division by a constant.
   This takes 17 ops including two multiplies. If the multiply by 3 is
done by a shift and add, and alternative 2 (or alternative 1 with the
multiply by 5 changed to a shift and add) is used, it is 20 elementary
instructions. Alternative 2 has a little instruction-level parallelism.
It was found by Aha!.
   A more accurate quotient estimate can be obtained by changing the
first executable line to

                        q = (n >> 1) + (n >> 3);

(which makes q too large by a factor of 2, but it has one more bit of
accuracy), and then inserting just before the assignment to r,

                              q = q >> 1;

With this variation, the remainder is at most 8. However, there does not
seem to be any better code for calculating r/3 with r limited to 8, than
there is for r limited to 14 (4 elementary ops in either case). Thus
using the idea would cost one instruction. */

int divs3b(int n) {
   int q, r;

   n = n + (n>>31 & 2);         // Add 2 if n < 0.
   q = (n >> 2) + (n >> 4);     // q = n*0.0101 (approx).
   q = q + (q >> 4);            // q = n*0.01010101.
   q = q + (q >> 8);
   q = q + (q >> 16);
   r = n - q*3;                 // 0 <= r <= 14.
   if (r < 0) printf("Negative r = %d\n", r);
   return q + (11*r >> 5);      // Returning q + r/3.
// return q + (5*(r + 1) >> 4);         // Alternative 1.
// return q + ((r + 5 + (r << 2)) >> 4);// Alternative 2.
}

/* The code below multiplies by 1/5 (binary 0.00110011...) by the shift
right and add, and then correct, method. 17 ops, including two
multiplies, or 21 elementary operations. 0 <= r <= 24. Not in book. */

int divs5a(int n) {
   int q, r;

   n = n + (n>>31 & 4);         // Add 4 if n < 0.
   q = (n >> 3) + (n >> 4);
   q = q + (q >> 4);
   q = q + (q >> 8);
   q = q + (q >> 16);
   r = n - q*5;
   return q + (13*r >> 6);
}

/* Code below is 18 ops including two multiplies, or 20 elementary ops.
0 <= r <= 9. */

int divs5b(int n) {
   int q, r;

   n = n + (n>>31 & 4);
   q = (n >> 1) + (n >> 2);
   q = q + (q >> 4);
   q = q + (q >> 8);
   q = q + (q >> 16);
   q = q >> 2;
   r = n - q*5;
   return q + (7*r >> 5);
// return q + (r>4) + (r>9);
}

/* The code below takes 17 ops including two multiplies, or 22
elementary ops. 0 <= r <= 29. The 11*r >> 6 can be done in 5 ops.
According to Aha! there are no 4-instruction solutions for this
(computing r/6 for 0 <= r <= 29), although there may be other
5-instruction solutions. Not in book. */

int divs6a(int n) {
   int q, r;

   n = n + (n>>31 & 5);
   q = (n >> 3) + (n >> 5);
   q = q + (q >> 4);
   q = q + (q >> 8);
   q = q + (q >> 16);
   r = n - q*6;
   return q + (11*r >> 6);
}

/* The variation below computes q to two more bits of accuracy than does
divs6a. 0 <= r <= 11. However, the approximation of 3r/16 for r/6 is not
quite good enough; we still have to use 11r/64. Hence this is worse than
divs6a by one instruction. However, the correction to q is at most 1,
which gives two interesting alternatives. If the multiplication by 6 is
done by shifts and adds (3 instructions), alternatives 1 and 2 are 17
and 16 elementary instructions, resp. These alternatives are useful
whenever the approximate quotient is off by at most 1 (i.e., 0 <= r <
2d). 18 ops including one multiply, or 20 elementary ops. */

int divs6b(int n) {
   int q, r;

   n = n + (n>>31 & 5);
   q = (n >> 1) + (n >> 3);
   q = q + (q >> 4);
   q = q + (q >> 8);
   q = q + (q >> 16);
   q = q >> 2;
   r = n - q*6;
   return q + ((r + 2) >> 3);
// return q + (r > 5);
}

/* Code below is 17 ops including two multiplies, or 21 elementary ops.
0 <= r <= 34. Not in book. */

int divs7a(int n) {
   int q, r;

   n = n + (n>>31 & 6);
   q = (n >> 3) + (n >> 6);   // q = n*0.001001 (approx).
   q = q + (q >> 6);          // q = n*0.001001001001.
   q = q + (q >> 12);
   q = q + (q >> 24);
   r = n - q*7;               // 0 <= r <= 34.
   return q + (37*r >> 8);    // Returning q + r/7.
}

/* Code below is 18 ops, including one multiply, or 19 elementary ops.
Uses 4/7 = 0.1001 0010 0100 1001 0010 0100 1001 0010. 0 <= r <= 13. */

int divs7b(int n) {
   int q, r;

   n = n + (n>>31 & 6);
   q = (n >> 1) + (n >> 4);
   q = q + (q >> 6);
   q = q + (q>>12) + (q>>24);
   q = q >> 2;
   r = n - q*7;
   return q + ((r + 1) >> 3);
// return q + (r > 6);
}

/* Code below is uses 8/9 = 0.1110 0011 1000 1110 0011 1000 1110 0011.
20 ops including the multiply, or 21 elementary ops. 0 <= r <= 13. */

int divs9(int n) {
   int q, r;

   n = n + (n>>31 & 8);
   q = (n >> 1) + (n >> 2) +
       (n >> 3);
   q = q + (q >> 6);
   q = q + (q>>12) + (q>>24);
   q = q >> 3;
   r = n - q*9;
   return q + ((r + 7) >> 4);
// return q + (r > 8);
}

/* Code below uses 8/10 = 0.1100 1100 1100 1100 1100 1100 1100 1100.
18 ops including the multiply, or 20 elementary ops. 0 <= r <= 13. */

int divs10(int n) {
   int q, r;

   n = n + (n>>31 & 9);
   q = (n >> 1) + (n >> 2);
   q = q + (q >> 4);
   q = q + (q >> 8);
   q = q + (q >> 16);
   q = q >> 3;
   r = n - q*10;
   return q + ((r + 6) >> 4);
// return q + (r > 9);
}

/* Code below uses 8/11 = 0.1011 1010 0010 1110 1000 1011 1010 0010.
20 ops including the multiply, or 23 elementary ops. 0 <= r <= 15. */

int divs11(int n) {
   int q, r;

   n = n + (n>>31 & 10);
   q = (n >> 1) + (n >> 2) -
       (n >> 5) + (n >> 7);
   q = q + (q >> 10);
   q = q + (q >> 20);
   q = q >> 3;
   r = n - q*11;
   return q + ((r + 5) >> 4);
// return q + (r > 10);
}

/* Code below uses 8/12 = 0.1010 1010 1010 1010 1010 1010 1010 1010.
18 ops including the multiply, or 20 elementary ops. 0 <= r <= 15. */

int divs12(int n) {
   int q, r;

   n = n + (n>>31 & 11);
   q = (n >> 1) + (n >> 3);
   q = q + (q >> 4);
   q = q + (q >> 8);
   q = q + (q >> 16);
   q = q >> 3;
   r = n - q*12;
   return q + ((r + 4) >> 4);
// return q + (r > 11);
}

/* Code below uses 8/13 = 0.1001 1101 1000 1001 1101 1000 1001 1101.
20 ops including the multiply, or 23 elementary ops. 0 <= r <= 20. */

int divs13(int n) {
   int q, r;

   n = n + (n>>31 & 12);
   q = (n>>1) + (n>>4);
   q = q + (q>>4) + (q>>5);
   q = q + (q>>12) + (q>>24);
   q = q >> 3;
   r = n - q*13;
   return q + ((r + 3) >> 4);
// return q + (r > 12);
}

/* Code below uses 64/100 = 0.1010 0011 1101 0111 0000 1010 0011 1101.
24 ops including the multiply, or 28 elementary ops. 0 <= r <= 105. */

int divs100(int n) {
   int q, r;

   n = n + (n>>31 & 99);
   q = (n >> 1) + (n >> 3) + (n >> 6) - (n >> 10) +
       (n >> 12) + (n >> 13) - (n >> 16);
   q = q + (q >> 20);
   q = q >> 6;
   r = n - q*100;
   return q + ((r + 28) >> 7);
// return q + (r > 99);
}

/* Code below uses 1/1000 = 0.0000 0000 0100 0001 1000 1001 0011 0111.
23 ops including two multiplies, or 32 elementary ops. 0 <= r <= 5997.
Not in book. */

int divs1000a(int n) {
   int q, r, t;

   n = n + (n>>31 & 999);
   t = n - (n >> 6) - (n >> 3);
   q = (n >> 10) + (n >> 16) + (n >> 17) + (n >> 21) + (n >> 24) +
       (t >> 26);
   r = n - q*1000;            // 0 <= r <= 5997.
   return q + (4195*r >> 22); // Returning q + r/1000.
// {int t = r + (r << 1);     // Alternative showing
// t = t + (t << 5);          // expansion of multipli-
// t = t + (r << 12);         // cation by 4195.
// return q + (t >> 22);}
}

/* Code below uses 512/1000 = 0.1000 0011 0001 0010 0110 1110 1001 0111.
26 ops including one multiply, or 30 elementary ops. 0 <= r <= 1015. */

int divs1000b(int n) {
   int q, r, t;

   n = n + (n>>31 & 999);
   t = (n >> 7) + (n >> 8) + (n >> 12);
   q = (n >> 1) + t + (n >> 15) + (t >> 11) + (t >> 14) +
       (n >> 26) + (t >> 21);
   q = q >> 9;
   r = n - q*1000;
   return q + ((r + 24) >> 10);
// return q + (r > 999);
}

int errors;
void error(int n, int r) {
   errors = errors + 1;
   printf("Error for n = %08x, got %08x (%d dec)\n", n, r, r);
}

int main() {
   int n;
   int q;
   /* Make low and high equal for an exhaustive test. */
   const int low = -15000000, high = -15000000;

   printf("divs3a:\n");
   n = low; do {
      q = divs3a(n);
      if (q != n/3) error(n, q);
      n = n + 1;
   } while (n != high);

   printf("divs3b:\n");
   n = low; do {
      q = divs3b(n);
      if (q != n/3) error(n, q);
      n = n + 1;
   } while (n != high);

   printf("divs5a:\n");
   n = low; do {
      q = divs5a(n);
      if (q != n/5) error(n, q);
      n = n + 1;
   } while (n != high);

   printf("divs5b:\n");
   n = low; do {
      q = divs5b(n);
      if (q != n/5) error(n, q);
      n = n + 1;
   } while (n != high);

   printf("divs6a:\n");
   n = low; do {
      q = divs6a(n);
      if (q != n/6) error(n, q);
      n = n + 1;
   } while (n != high);

   printf("divs6b:\n");
   n = low; do {
      q = divs6b(n);
      if (q != n/6) error(n, q);
      n = n + 1;
   } while (n != high);

   printf("divs7a:\n");
   n = low; do {
      q = divs7a(n);
      if (q != n/7) error(n, q);
      n = n + 1;
   } while (n != high);

   printf("divs7b:\n");
   n = low; do {
      q = divs7b(n);
      if (q != n/7) error(n, q);
      n = n + 1;
   } while (n != high);

   printf("divs9:\n");
   n = low; do {
      q = divs9(n);
      if (q != n/9) error(n, q);
      n = n + 1;
   } while (n != high);

   printf("divs10:\n");
   n = low; do {
      q = divs10(n);
      if (q != n/10) error(n, q);
      n = n + 1;
   } while (n != high);

   printf("divs11:\n");
   n = low; do {
      q = divs11(n);
      if (q != n/11) error(n, q);
      n = n + 1;
   } while (n != high);

   printf("divs12:\n");
   n = low; do {
      q = divs12(n);
      if (q != n/12) error(n, q);
      n = n + 1;
   } while (n != high);

   printf("divs13:\n");
   n = low; do {
      q = divs13(n);
      if (q != n/13) error(n, q);
      n = n + 1;
   } while (n != high);

   printf("divs100:\n");
   n = low; do {
      q = divs100(n);
      if (q != n/100) error(n, q);
      n = n + 1;
   } while (n != high);

   printf("divs1000a:\n");
   n = low; do {
      q = divs1000a(n);
      if (q != n/1000) error(n, q);
      n = n + 1;
   } while (n != high);

   printf("divs1000b:\n");
   n = low; do {
      q = divs1000b(n);
      if (q != n/1000) error(n, q);
      n = n + 1;
   } while (n != high);

   if (errors == 0)
      printf("Passed all tests.\n");
}