#include #include #include #include #include #include //#define NULL ((void *) 0) typedef uint64_t uint64; typedef int64_t int64; #define BITS_PER_BITMAPWORD 64 typedef uint64 bitmapword; /* must be an unsigned type */ typedef int64 signedbitmapword; /* must be the matching signed type */ #define WORDNUM(x) ((x) / BITS_PER_BITMAPWORD) #define BITNUM(x) ((x) % BITS_PER_BITMAPWORD) typedef struct Bitmapset { int nwords; /* number of words in array */ bitmapword words[]; /* really [nwords] */ } Bitmapset; static inline int bmw_rightmost_one_pos(uint64 word) { return __builtin_ctzll(word); } // 1. Original version int bms_next_member(const Bitmapset *a, int prevbit) { int nwords; bitmapword mask; if (a == NULL) return -2; nwords = a->nwords; prevbit++; mask = (~(bitmapword) 0) << BITNUM(prevbit); for (int wordnum = WORDNUM(prevbit); wordnum < nwords; wordnum++) { bitmapword w = a->words[wordnum]; /* ignore bits before prevbit */ w &= mask; if (w != 0) { int result; result = wordnum * BITS_PER_BITMAPWORD; result += bmw_rightmost_one_pos(w); return result; } /* in subsequent words, consider all bits */ mask = (~(bitmapword) 0); } return -2; } // 2. David's version int bms_next_member_david(const Bitmapset *a, int prevbit) { int64 currbit; size_t nwords; bitmapword mask; if (a == NULL) return-2; nwords = a->nwords; currbit = (int64) prevbit + 1; mask = (~(bitmapword) 0) << BITNUM(currbit); for (size_t wordnum = WORDNUM(currbit); wordnum < nwords; wordnum++) { bitmapword w = a->words[wordnum]; /* ignore bits before currbit */ w &= mask; if (w != 0) { int result; result = (int) wordnum * BITS_PER_BITMAPWORD; result += bmw_rightmost_one_pos(w); return result; } /* in subsequent words, consider all bits */ mask = (~(bitmapword) 0); } return -2; } // 3. Original version + INT32_MAX check + 64bit int bms_next_member_chao(const Bitmapset *a, int prevbit) { size_t nwords; bitmapword mask; if (a == NULL || prevbit == INT32_MAX) return -2; nwords = (size_t) a->nwords; prevbit++; mask = (~(bitmapword) 0) << BITNUM(prevbit); for (size_t wordnum = WORDNUM(prevbit); wordnum < nwords; wordnum++) { bitmapword w = a->words[wordnum]; /* ignore bits before prevbit */ w &= mask; if (w != 0) { int result; result = (int)wordnum * BITS_PER_BITMAPWORD; result += bmw_rightmost_one_pos(w); return result; } /* in subsequent words, consider all bits */ mask = (~(bitmapword) 0); } return -2; } // 4. Pull up first iteration int bms_next_member_unroll_first(const Bitmapset *a, int prevbit) { uint64 currbit; int wordnum; int nwords; if (a == NULL) return -2; currbit = (int64) prevbit + 1; wordnum = WORDNUM(currbit); nwords = a->nwords; if (wordnum >= nwords) return -2; /* Handle first word with mask */ const bitmapword *p = &a->words[wordnum]; bitmapword w = (*p) & ((~(bitmapword) 0) << BITNUM(currbit)); if (w != 0) return (wordnum * BITS_PER_BITMAPWORD) + bmw_rightmost_one_pos(w); /* The "Tight" Pointer Scan */ const bitmapword *end = &a->words[nwords]; for (p++; p < end; p++) { if (*p != 0) { wordnum = p - a->words; // Pointer arithmetic to get index return (wordnum * BITS_PER_BITMAPWORD) + bmw_rightmost_one_pos(*p); } } return -2; } double get_time() { struct timespec ts; clock_gettime(CLOCK_PROCESS_CPUTIME_ID, &ts); return ts.tv_sec + ts.tv_nsec * 1e-9; } int main() { int words_to_alloc = 1; // Large set to bypass CPU cache slightly Bitmapset *bms = malloc(sizeof(Bitmapset) + words_to_alloc * sizeof(bitmapword)); bms->nwords = words_to_alloc; memset(bms->words, 0, words_to_alloc * sizeof(bitmapword)); double end; int64 count = 0; /* Set a bit far into the set to force a long scan */ bms->words[words_to_alloc - 1] |= 0xaf4; int iterations = 100000000; printf("Benchmarking %d iterations...\n\n", iterations); // Test Original double start = get_time(); for (int i = 0; i < iterations; i++) { int j = -1; while ((j = bms_next_member(bms, j)) >= 0) count++; } end = get_time(); printf("Original: %.5f seconds\n", end - start); // Test Fast start = get_time(); for (int i = 0; i < iterations; i++) { int j = -1; while ((j = bms_next_member_david(bms, j)) >= 0) count++; } end = get_time(); printf("David's: %.5f seconds\n", end - start); // Test Chao's version start = get_time(); for (int i = 0; i < iterations; i++) { int j = -1; while ((j = bms_next_member_chao(bms, j)) >= 0) count++; } end = get_time(); printf("Chao's version: %.5f seconds\n", end - start); // Pull up first iteration start = get_time(); for (int i = 0; i < iterations; i++) { int j = -1; while ((j = bms_next_member_unroll_first(bms, j)) >= 0) count++; } end = get_time(); printf("Unrolled loop: %.5f seconds\n", end - start); printf("%ld\n", count); free(bms); return 0; }