ROW = 8  # number of items per line in the arrays

def list_to_c_array(l, name, eltype):
    def rows():
        for n in range(0, len(l), ROW):
            yield "\t/* 0x{:x} */ ".format(n) + ", ".join("0x{:x}".format(f) for f in l[n:n+ROW])

    return "{eltype} {name}[{len}] = {{\n{rows}\n}};".format(
        eltype=eltype,
        name=name,
        len=len(l),
        rows=",\n".join(rows())
    )


def utf32_to_gb(i):
    s = unichr(i).encode('GB18030', 'ignore') or '\0'
    if len(s) == 1:
        return ord(s)
    elif len(s) == 2:
        b1, b2 = s
        return (ord(b1) << 8) + ord(b2)
    elif len(s) == 4:
        b1, b2, b3, b4 = s
        return (ord(b1) << 24) + (ord(b2) << 16) + (ord(b3) << 8) + ord(b4)
    else:
        raise Exception("Unexpected length: {}".format(len(s)))

def lin_4(s):
    b1, b2, b3, b4 = s
    b1, b2, b3, b4 = ord(b1), ord(b2), ord(b3), ord(b4)
    return b1 * 12600 + b2 * 1260 + b3 * 10 + b4 - 1687218;

def lin_2(s):
    b1, b2 = s
    b1, b2 = ord(b1), ord(b2)
    if b2 >= 0x80:
        b2 -= (0x80 - 0x7f)
    b1 -= 0x81
    b2 -= 0x40

    return b1 * ((0xff - 0x80) + (0x7f - 0x40)) + b2

gb18030_4_to_utf32 = []
gb18030_2_to_utf32 = [0] * lin_2('\xfe\xff')
for e in range(0x10000):
    s = unichr(e).encode('GB18030', 'ignore')
    if len(s) == 4:
        l = lin_4(s)
        if l != len(gb18030_4_to_utf32):
            raise Exception("All 4 byte codes should be sequential")
        gb18030_4_to_utf32.append(e)
    elif len(s) == 2:
        l = lin_2(s)
        if gb18030_2_to_utf32[l]:
            raise Exception("lin_2 {:04x} occurred more than once: {:04x} and {:04x}".format(l, e, gb18030_2_to_utf32[l]))
        gb18030_2_to_utf32[l] = e

print list_to_c_array(
        [utf32_to_gb(i) for i in range(0x10000)],
        name='utf32_to_gb18030', 
        eltype='unsigned int'
    )
print list_to_c_array(gb18030_2_to_utf32, name='gb18030_2_to_utf32', eltype='unsigned short')
print list_to_c_array(gb18030_4_to_utf32, name='gb18030_4_to_utf32', eltype='unsigned short')
