From 08e75e0b5171dca51d35d2aa4f0cadf02628cbe6 Mon Sep 17 00:00:00 2001 From: Hunaid Sohail Date: Thu, 5 Sep 2024 12:54:42 +0500 Subject: [PATCH v2] Add RN/rn support for to_number function --- doc/src/sgml/func.sgml | 11 ++- src/backend/utils/adt/formatting.c | 135 +++++++++++++++++++++++++- src/backend/utils/errcodes.txt | 1 + src/test/regress/expected/numeric.out | 40 ++++++++ src/test/regress/sql/numeric.sql | 10 ++ 5 files changed, 192 insertions(+), 5 deletions(-) diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml index 461fc3f437..e8adf28940 100644 --- a/doc/src/sgml/func.sgml +++ b/doc/src/sgml/func.sgml @@ -8628,7 +8628,7 @@ SELECT regexp_match('abc01234xyz', '(?:(.*?)(\d+)(.*)){1,1}'); RN - Roman numeral (input between 1 and 3999) + Roman numeral (valid for numbers 1 to 3999) TH or th @@ -8754,6 +8754,15 @@ SELECT regexp_match('abc01234xyz', '(?:(.*?)(\d+)(.*)){1,1}'); (e.g., 9.99EEEE is a valid pattern). + + + + In to_number, RN pattern converts + roman numerals to standard numbers. It is case-insensitive (e.g., 'XIV', + 'xiv', and 'Xiv' are all seen as 14). + When using RN, other format elements are ignored. + + diff --git a/src/backend/utils/adt/formatting.c b/src/backend/utils/adt/formatting.c index 68069fcfd3..fb12bf91d0 100644 --- a/src/backend/utils/adt/formatting.c +++ b/src/backend/utils/adt/formatting.c @@ -49,7 +49,6 @@ * - better number building (formatting) / parsing, now it isn't * ideal code * - use Assert() - * - add support for roman number to standard number conversion * - add support for number spelling * - add support for string to string formatting (we must be better * than Oracle :-), @@ -270,6 +269,29 @@ static const char *const rm100[] = {"C", "CC", "CCC", "CD", "D", "DC", "DCC", "D static const char *const numTH[] = {"ST", "ND", "RD", "TH", NULL}; static const char *const numth[] = {"st", "nd", "rd", "th", NULL}; +/* ---------- + * MACRO: Check if the current and next characters + * form a valid subtraction combination for roman numerals + * ---------- + */ +#define IS_VALID_SUB_COMB(curr, next) \ + (((curr) == 'I' && ((next) == 'V' || (next) == 'X')) || \ + ((curr) == 'X' && ((next) == 'L' || (next) == 'C')) || \ + ((curr) == 'C' && ((next) == 'D' || (next) == 'M'))) + +/* ---------- + * MACRO: Roman number value + * ---------- + */ +#define ROMAN_VAL(r) \ + ((r) == 'I' ? 1 : \ + (r) == 'V' ? 5 : \ + (r) == 'X' ? 10 : \ + (r) == 'L' ? 50 : \ + (r) == 'C' ? 100 : \ + (r) == 'D' ? 500 : \ + (r) == 'M' ? 1000 : 0) + /* ---------- * Flags & Options: * ---------- @@ -1074,6 +1096,7 @@ static bool do_to_timestamp(text *date_txt, text *fmt, Oid collid, bool std, static char *fill_str(char *str, int c, int max); static FormatNode *NUM_cache(int len, NUMDesc *Num, text *pars_str, bool *shouldFree); static char *int_to_roman(int number); +static int roman_to_int(char* s, int len); static void NUM_prepare_locale(NUMProc *Np); static char *get_last_relevant_decnum(char *num); static void NUM_numpart_from_char(NUMProc *Np, int id, int input_len); @@ -5236,6 +5259,99 @@ int_to_roman(int number) } +static int +roman_to_int(char* s, int len) +{ + int repeatCount = 1; + int vCount = 0, lCount = 0, dCount = 0; + bool subtractionEncountered = false; + char lastSubtractedChar = 0; + int total = 0; + + if (len == 0 || len > 15) + return 0; + + for (int i = 0; i < len; ++i) + { + char currChar = toupper(s[i]); + int currValue = ROMAN_VAL(currChar); + + if (currValue == 0) + return 0; + + /* Ensure no character greater than or equal to the subtracted + * character appears after the subtraction. + */ + if (subtractionEncountered && (currValue >= ROMAN_VAL(lastSubtractedChar))) + return 0; + + /* Check for invalid repetitions of characters V, L, or D. */ + if (currChar == 'V') vCount++; + if (currChar == 'L') lCount++; + if (currChar == 'D') dCount++; + if (vCount > 1 || lCount > 1 || dCount > 1) + return 0; + + if (i < len - 1) + { + char nextChar = toupper(s[i + 1]); + int nextValue = ROMAN_VAL(nextChar); + + if (nextValue == 0) + return 0; + + /* If the current value is less than the next value, + * handle subtraction. Verify valid subtractive + * combinations and update the total accordingly. + */ + if (currValue < nextValue) + { + /* Check for invalid repetitions of characters V, L, or D. */ + if (nextChar == 'V') vCount++; + if (nextChar == 'L') lCount++; + if (nextChar == 'D') dCount++; + if (vCount > 1 || lCount > 1 || dCount > 1) + return 0; + + /* for cases where the same character is repeated + * with subtraction. Like 'MCCM' or 'DCCCD'. + */ + if (repeatCount > 1) + return 0; + + if (!IS_VALID_SUB_COMB(currChar, nextChar)) + return 0; + + /* Skip the next character as it is part of + * the subtractive combination. + */ + i++; + repeatCount = 1; + subtractionEncountered = true; + lastSubtractedChar = currChar; + total += (nextValue - currValue); + } + else + { + /* for same characters, check for repetition */ + if (currChar == nextChar) + { + repeatCount++; + if (repeatCount > 3) + return 0; + } + else + repeatCount = 1; + total += currValue; + } + } + /* add the value of the last character */ + else + total += currValue; + } + + return total; +} /* ---------- * Locale @@ -5787,6 +5903,7 @@ NUM_processor(FormatNode *node, NUMDesc *Num, char *inout, *Np = &_Np; const char *pattern; int pattern_len; + int roman_result; MemSet(Np, 0, sizeof(NUMProc)); @@ -5817,9 +5934,19 @@ NUM_processor(FormatNode *node, NUMDesc *Num, char *inout, if (IS_ROMAN(Np->Num)) { if (!Np->is_to_char) - ereport(ERROR, - (errcode(ERRCODE_FEATURE_NOT_SUPPORTED), - errmsg("\"RN\" not supported for input"))); + { + roman_result = roman_to_int(inout, input_len); + if (roman_result == 0) + ereport(ERROR, + (errcode(ERRCODE_INVALID_ROMAN_NUMERAL), + errmsg("invalid roman numeral"))); + else + { + Np->Num->pre = sprintf(number, "%d", roman_result); + Np->Num->post = 0; + return number; + } + } Np->Num->lsign = Np->Num->pre_lsign_num = Np->Num->post = Np->Num->pre = Np->out_pre_spaces = Np->sign = 0; diff --git a/src/backend/utils/errcodes.txt b/src/backend/utils/errcodes.txt index b43a24d4bc..a64188da82 100644 --- a/src/backend/utils/errcodes.txt +++ b/src/backend/utils/errcodes.txt @@ -201,6 +201,7 @@ Section: Class 22 - Data Exception 22P03 E ERRCODE_INVALID_BINARY_REPRESENTATION invalid_binary_representation 22P04 E ERRCODE_BAD_COPY_FILE_FORMAT bad_copy_file_format 22P05 E ERRCODE_UNTRANSLATABLE_CHARACTER untranslatable_character +22P07 E ERRCODE_INVALID_ROMAN_NUMERAL invalid_roman_numeral 2200L E ERRCODE_NOT_AN_XML_DOCUMENT not_an_xml_document 2200M E ERRCODE_INVALID_XML_DOCUMENT invalid_xml_document 2200N E ERRCODE_INVALID_XML_CONTENT invalid_xml_content diff --git a/src/test/regress/expected/numeric.out b/src/test/regress/expected/numeric.out index f30ac236f5..d20603afc3 100644 --- a/src/test/regress/expected/numeric.out +++ b/src/test/regress/expected/numeric.out @@ -2297,6 +2297,46 @@ SELECT to_number('42nd', '99th'); 42 (1 row) +SELECT to_number('XIV', 'RN'); + to_number +----------- + 14 +(1 row) + +SELECT to_number('DCCCXLV', 'rn'); + to_number +----------- + 845 +(1 row) + +SELECT to_number('mmxxiv', 'RN'); + to_number +----------- + 2024 +(1 row) + +SELECT to_number('MMMCMXCIX', 'RN'); + to_number +----------- + 3999 +(1 row) + +SELECT to_number('CvIiI', 'rn'); + to_number +----------- + 108 +(1 row) + +SELECT to_number('viv', 'RN'); +ERROR: invalid roman numeral +SELECT to_number('DCCCD', 'RN'); +ERROR: invalid roman numeral +SELECT to_number('XIXL', 'RN'); +ERROR: invalid roman numeral +SELECT to_number('MCCM', 'RN'); +ERROR: invalid roman numeral +SELECT to_number('', 'RN'); +ERROR: invalid roman numeral RESET lc_numeric; -- -- Input syntax diff --git a/src/test/regress/sql/numeric.sql b/src/test/regress/sql/numeric.sql index c86395209a..42331558d1 100644 --- a/src/test/regress/sql/numeric.sql +++ b/src/test/regress/sql/numeric.sql @@ -1070,6 +1070,16 @@ SELECT to_number('$1,234.56','L99,999.99'); SELECT to_number('1234.56','L99,999.99'); SELECT to_number('1,234.56','L99,999.99'); SELECT to_number('42nd', '99th'); +SELECT to_number('XIV', 'RN'); +SELECT to_number('DCCCXLV', 'rn'); +SELECT to_number('mmxxiv', 'RN'); +SELECT to_number('MMMCMXCIX', 'RN'); +SELECT to_number('CvIiI', 'rn'); +SELECT to_number('viv', 'RN'); +SELECT to_number('DCCCD', 'RN'); +SELECT to_number('XIXL', 'RN'); +SELECT to_number('MCCM', 'RN'); +SELECT to_number('', 'RN'); RESET lc_numeric; -- -- 2.34.1