// $Id: test_hash_2.c 1049 2017-12-24 17:52:23Z fabien $

/*
 * Pseudo-Random Permutation (PRP): the usual approach is a 4 stage
 * (Luby-Rackoff) Feistel network based on some "good" hash function.
 * However this construction only works for even powers of 2.
 * Unbalance is possible as well, but it must still be a power of 2 size.
 * TOO BAD.
 */

#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include <assert.h>
#include <sys/time.h>
//#include <stdint.h>
#include <inttypes.h> // PRI?64
#include <math.h> // pow

// #define DEBUG

#define SEED  999484257552941047LL

typedef enum { false, true } bool;

/* overall parameters
 */
#define N_MASKS 6
#define PRP_ROUNDS 8

// NOT GOOD WITH POWER OF 2
#define ROUNDS 2

static int64_t primes[PRP_ROUNDS] = {
  // a few 25-27 bits primes
  33303727LL,
  49979693LL,
  67867979LL,
  86028157LL,
  104395303LL,
  122949829LL,
  141650963LL,
  160481219LL
};

/* compute largest hierarchical mask to hold 0 .. size-2
 * YES, size-2: If size is a power of 2, we want the half mask
 */
static int64_t compute_mask(const int64_t size)
{
  // 8-bit shortcut up to 2**32
  int64_t m =
    size > 0xffffffffLL ? 0xffffffffLL:
    size > 0xffffffLL ? 0xffffffLL:
    size > 0xffffLL ? 0xffffLL:
    size > 0xffLL ? 0xffLL: 0LL;

  // remaining bits
  do {
    m = (m << 1) | 1LL;
  } while (size-1 > m);

  m >>= 1;

  return m;
}

/* Donald Knuth linear congruential generator */
#define DK_MUL 6364136223846793005LL
#define DK_INC 1442695040888963407LL

/* do not use all small bits */
#define PRP_SHIFT 13

/*
 * Apply pseudo-random permutation with a seed.
 *
 * The key idea behind "pseudo-random"... is that it is not random at all,
 * kind of an oxymoron. So it really means "steered-up permutation", whatever
 * good it may bring.
 *
 * Result in [0, size) is a permutation for inputs in the same set,
 * at least if size is under 112.9E9 (otherwise it is unclear because
 * of int64_t overflow in the multiply-modulo phase).
 *
 * The result are not fully convincing on very small sizes, but this
 * is not the typical use case.
 *
 * Function performance is reasonable.
 *
 * THIS FUNCTION IS ** NOT ** CRYPTOGRAPHICALLY SECURE AT ALL.
 * PLEASE DO NOT USE FOR SUCH PURPOSE.
 */
static int64_t
pseudorandom_perm(const int64_t data, const int64_t size, const int64_t iseed)
{
  assert(size > 0);

  uint64_t seed = iseed;
  uint64_t v = (uint64_t) data % size;

  assert(0 <= v && v < size);

  int64_t mask = compute_mask(size);

  // for small seeds:
  // seed = seed * DK_MUL + DK_INC;

  // fprintf(stderr, "mask=%ld (0x%lx) size=%ld\n",
  //         mask, (uint64_t) mask, size);
  assert(mask < size && size <= 2 * mask + 2);

  for (int i = 0, p = 0; i < ROUNDS; i++, p++)
  {
    // first "half" whitening
    if (v <= mask)
      v ^= (seed >> PRP_SHIFT) & mask;
    seed = seed * DK_MUL + DK_INC;
    // assert(0 <= v && v < size);

    // and second overlapping "half" whitening and reversing
    if (size-v-1 <= mask)
      v = size - mask + ((size - v - 1) ^ ((seed >> PRP_SHIFT) & mask)) - 1;
    seed = seed * DK_MUL + DK_INC;
    // assert(0 <= v && v < size);

    // given the prime sizes, at most 2 primes are skipped for a given size
    while (size % primes[p] == 0)
      p++;

    // scatter
    v = (primes[p] * v + (seed >> PRP_SHIFT)) % size;
    seed = seed * DK_MUL + DK_INC;
    // assert(0 <= v && v < size);

    // bit rotate? bit permute? others?
  }

  /*
  // first "half" whitening
  if (v <= mask)
    v ^= (seed >> PRP_SHIFT) & mask;

  seed = seed * DK_MUL + DK_INC;
  // assert(0 <= v && v < size);

  // and second overlapping "half" whitening and reversing
  if (size-v-1 <= mask)
      v = size - mask + ((size - v - 1) ^ ((seed >> PRP_SHIFT) & mask)) - 1;
  seed = seed * DK_MUL + DK_INC;
  // assert(0 <= v && v < size);
  */

  assert(0 <= v && v < size);
  return v;
}

static void set_unless_set(uint64_t *bitfield, int64_t index, int64_t origin)
{
  int64_t i = index / 64;
  uint64_t b = 1ULL << (index % 64);
  if ((bitfield[i] & b) != 0)
  {
    fprintf(stderr, "PERM ERROR: %ld->%ld already used\n", origin, index);
    abort();
  }
  bitfield[i] |= b;
}

int main(int argc, char* argv[])
{
  if (argc < 2)
  {
    fprintf(stderr, "usage: %s size [seed [loop]]\n", argv[0]);
    exit(1);
  }

  int64_t N = atoll(argv[1]);

  int64_t seed;
  const char * pgh = argc >= 3? argv[2]: getenv("PGBENCH_SEED");

  if (pgh)
  {
    if (strcmp(pgh, "random") == 0 || strcmp(pgh, "rand") == 0)
    {
      // BUG: random always returns 1804289383 if not seeded somehow
      struct timeval tv;
      gettimeofday(&tv, NULL);
      srand(tv.tv_sec ^ tv.tv_usec);
      seed = (((int64_t) rand()) << 32) | rand();
    }
    else
      seed = atoll(pgh);
  }
  else
    seed = SEED;

  int loop = 1;
  if (argc >= 4)
    loop = atoi(argv[3]);

  if (loop > 1)
  {
    assert(N <= 32);
    int count[N][N];
    bzero(count, N*N*sizeof(int));

    for (int l = 0; l < loop; l++)
    {
      for (int64_t i = 0; i < N; i++)
        count[i][pseudorandom_perm(i, N, seed)] += 1;
      seed += 1;
    }

    for (int64_t i = 0; i < N; i++)
    {
      fprintf(stdout, "%ld: ", i);
      for (int j = 0; j < N; j++)
        fprintf(stdout, " %d", count[i][j]);
      fprintf(stdout, "\n");
    }
  }
  else
  {
    fprintf(stdout, "size=%ld seed=0x%"PRIx64" rounds=%d\n", N, seed, ROUNDS);

    // number of words
    int NW = (N+63)/64;
    // uint64_t n[NW];
    uint64_t * n = (uint64_t *) malloc(NW * sizeof(uint64_t));
    assert(n != NULL);
    bzero(n, NW * sizeof(uint64_t));

    // about 4,000,000 computations/s, 0.25 µs/permutation (2.4 GHz cpu)
    for (int64_t i = 0; i < N; i++)
    {
      int64_t j = pseudorandom_perm(i, N, seed);

      // show for small N
      if (i <= 5000)
        fprintf(stdout, "%ld -> %ld\n", i, j);

      // check that it is indeed a permutation
      assert(0 <= j && j < N);
      set_unless_set(n, j, i);
    }

    free(n);
  }

  return 0;
}
