// sanity test for euc-japan specific character incrementer. // // -v displays status for invalid source charcode. // -m displays status for the result that the product of new // incrementer is match to the one of the generic incrementer. // show all status lines when both -v and -m are specified. // // `euctest | grep FAILED' shows remaining glitches using new // incrementer. (2 lines) // // CAUTION: // `euctest' yields 190 lines. // `euctest -m' yields 17863 lines. // `euctest -m -v' yields 16843008 lines. // // Sample of output lines: // src => dest result - status // 7e => 7f successed - Match to generic inc // 7f => 7f FAILED - Match to generic inc // 8edf => a1a1 successed - Don't match to generic inc(8edf) // // successed/FAILED in result status shows the return value of // character increment function. Following description says that the // result of the new incrementer was/was'nt identical to the generic // incrementer. #include #include #define SS2 0x8e /* single shift 2 (JIS0201) */ #define SS3 0x8f /* single shift 3 (JIS0212) */ #define HIGHBIT (0x80) #define IS_HIGHBIT_SET(ch) ((unsigned char)(ch) & HIGHBIT) typedef int bool; static int false = 0; static int true = 1; static bool pg_generic_charinc(unsigned char *charptr, int len); static bool pg_eucjp_increment(unsigned char *charptr, int length); static int pg_eucjp_verifier(const unsigned char *s, int len); void do_check(int len, unsigned char *buf, int dispinvalid, int dispmatch); int scatf(char* buf, char* format, ...); int main(int argc, char **argv) { unsigned int i, j, k; unsigned char buf[3]; int res; int dispinvalid = 0; int dispmatch = 0; for (i = 1 ; i < argc ; i++) { if (strcmp(argv[i], "-v") == 0) dispinvalid = 1; if (strcmp(argv[i], "-m") == 0) dispmatch = 1; } // single byte characters for (i = 0 ; i < 256 ; i++) { *buf = i; do_check(1, buf, dispinvalid, dispmatch); } // 2 byte characters for (i = 0 ; i < 256 ; i++) { for (j = 0 ; j < 256 ; j++) { *buf = i; buf[1] = j; do_check(2, buf, dispinvalid, dispmatch); } } // 3 byte characters for (i = 0 ; i < 256 ; i++) { for (j = 0 ; j < 256 ; j++) { for (k = 0 ; k < 256 ; k++) { *buf = i; buf[1] = j; buf[2] = k; do_check(3, buf, dispinvalid, dispmatch); } } } } void do_check(int len, unsigned char *buf, int dispinvalid, int dispmatch) { unsigned char buf2[3]; char outbuf[1024]; int i, src_is_valid, successed, gensuccessed, match; *outbuf = 0; src_is_valid = (pg_eucjp_verifier(buf, len) == len); if (!src_is_valid) { if (dispinvalid) { for (i = 0 ; i < len ; i++) scatf(outbuf, "%02x", buf[i]); strcat(outbuf, " - Src char is invalid."); puts(outbuf); } return; } memcpy(buf2, buf, len); for (i = 0 ; i < len ; i++) scatf(outbuf, "%02x", ((int)buf[i] & 0xff)); strcat(outbuf, " => "); successed = pg_eucjp_increment(buf, len); gensuccessed = pg_generic_charinc((char*)buf2, len); match = (memcmp(buf, buf2, len) == 0); if (!gensuccessed || !match || dispmatch) { for (i = 0 ; i < len ; i++) scatf(outbuf, "%02x", ((int)buf[i] & 0xff)); scatf(outbuf, " %s - %s", (successed ? "successed" : "FAILED"), (match ? "Match to generic inc" : "Don't match to generic inc")); if (!match) { strcat(outbuf, "("); for (i = 0 ; i < len ; i++) scatf(outbuf, "%02x", ((int)buf2[i] & 0xff)); strcat(outbuf, ")"); } puts(outbuf); } } static bool pg_eucjp_increment(unsigned char *charptr, int length) { unsigned char bak[3]; bool success; unsigned char c1, c2; signed int i; memcpy(bak, charptr, length); c1 = *charptr; switch (c1) { case SS2: /* JIS X 0201 */ if (length != 2) return false; c2 = charptr[1]; if (c2 > 0xde) charptr[0] = charptr[1] = 0xa1; else if (c2 < 0xa1) charptr[1] = 0xa1; else charptr[1]++; break; case SS3: /* JIS X 0212 */ if (length != 3) return false; for (i = 2 ; i > 1 ; i--) { c2 = charptr[i]; if (c2 < 0xa1) { charptr[i] = 0xa1; return true; } else if (c2 < 0xfe) { charptr[i]++; break; } charptr[i] = 0xa1; } if (i == 0) /* Out of code region */ { memcpy(charptr, bak, length); return false; } break; default: if (IS_HIGHBIT_SET(c1)) /* JIS X 0208? */ { if (length != 2) return false; for (i = 1 ; i >= 0 ; i--) /* i must be signed */ { c2 = charptr[i]; if (c2 < 0xa1) { charptr[i] = 0xa1; return true; } else if (c2 < 0xfe) { charptr[i]++; break; } charptr[i] = 0xa1; } if (i < 0) /* Out of 2 byte code region */ { memcpy(charptr, bak, length); return false; } } else { /* ASCII */ if (c1 > 0x7e) return false; (*charptr)++; } } /* Check the result with pg_eucjp_verifier as the last resort. */ success = (pg_eucjp_verifier(charptr, length) == length); if (!success) memcpy(charptr, bak, length); return success; } #define IS_EUC_RANGE_VALID(c) ((c) >= 0xa1 && (c) <= 0xfe) static int pg_eucjp_verifier(const unsigned char *s, int len) { int l; unsigned char c1, c2; c1 = *s++; switch (c1) { case SS2: /* JIS X 0201 */ l = 2; if (l > len) return -1; c2 = *s++; if (c2 < 0xa1 || c2 > 0xdf) return -1; break; case SS3: /* JIS X 0212 */ l = 3; if (l > len) return -1; c2 = *s++; if (!IS_EUC_RANGE_VALID(c2)) return -1; c2 = *s++; if (!IS_EUC_RANGE_VALID(c2)) return -1; break; default: if (IS_HIGHBIT_SET(c1)) /* JIS X 0208? */ { l = 2; if (l > len) return -1; if (!IS_EUC_RANGE_VALID(c1)) return -1; c2 = *s++; if (!IS_EUC_RANGE_VALID(c2)) return -1; } else /* must be ASCII */ { l = 1; } break; } return l; } static bool pg_generic_charinc(unsigned char *charptr, int len) { unsigned char *lastchar = (unsigned char *) (charptr + len - 1); unsigned char savelastchar = *lastchar; const char *const_charptr = (const char *)charptr; while (*lastchar < (unsigned char) 255) { (*lastchar)++; if (pg_eucjp_verifier(const_charptr, len) != len) // modified. continue; return true; } *lastchar = savelastchar; return false; } int scatf(char* buf, char* format, ...) { va_list args; int ret; va_start(args, format); ret = vsprintf(buf + strlen(buf), format, args); va_end(args); return ret; }