/* PRINTENTS.C: Produce a table of entities, with capital and lowercase forms, to test capitalization functions */ #include #define UCHAR unsigned int /* CODE TO TAKE BEGINS HERE */ /* This technically should be done with data like that at ftp://ftp.unicode.org/Public/UNIDATA/ , at least for values not covered here */ /* This function is inclusive of bounds: */ #define CHAR_BETWEEN(_char, _low, _high) (((_char) >= (_low)) && ((_char) <= (_high))) /* These could be sped up greatly by using ?: */ #define XP_IS_UPPERCASE(_ch) \ ( CHAR_BETWEEN((_ch),'A','Z') || \ ( CHAR_BETWEEN((_ch),0x00C0,0x00DE) && ((_ch) != 0x00D7)) || \ ((_ch) == 0x0178 ) || \ ((!((_ch) & 1)) && ( \ CHAR_BETWEEN((_ch),0x0100,0x0137) || \ CHAR_BETWEEN((_ch),0x014A,0x0177) || \ CHAR_BETWEEN((_ch),0x0182,0x0185) || \ CHAR_BETWEEN((_ch),0x0198,0x0199) || \ CHAR_BETWEEN((_ch),0x01A0,0x01A5) || \ CHAR_BETWEEN((_ch),0x01AC,0x01AD) || \ CHAR_BETWEEN((_ch),0x01B8,0x01B9) || \ CHAR_BETWEEN((_ch),0x01BC,0x01BD) || \ CHAR_BETWEEN((_ch),0x01DE,0x01EF) || \ CHAR_BETWEEN((_ch),0x01F4,0x0217) || \ CHAR_BETWEEN((_ch),0x03E2,0x03EF) \ )) || \ (((_ch) & 1) && ( \ CHAR_BETWEEN((_ch),0x0139,0x0148) || \ CHAR_BETWEEN((_ch),0x0179,0x017E) || \ CHAR_BETWEEN((_ch),0x0187,0x0188) || \ CHAR_BETWEEN((_ch),0x018B,0x018C) || \ CHAR_BETWEEN((_ch),0x0191,0x0192) || \ CHAR_BETWEEN((_ch),0x01A7,0x01A8) || \ CHAR_BETWEEN((_ch),0x01AF,0x01B0) || \ CHAR_BETWEEN((_ch),0x01B3,0x01B6) || \ CHAR_BETWEEN((_ch),0x01CD,0x01DC) \ )) || \ ( CHAR_BETWEEN((_ch),0x0391,0x03AB) && ((_ch) != 0x03A2) ) || \ 0 ) #define XP_IS_LOWERCASE(_ch) \ ( CHAR_BETWEEN((_ch),'a','z') || \ ( CHAR_BETWEEN((_ch),0x00E0,0x00FE) && ((_ch) != 0x00F7)) || \ ((_ch) == 0x00FF ) || \ (((_ch) & 1) && ( \ CHAR_BETWEEN((_ch),0x0100,0x0137) || \ CHAR_BETWEEN((_ch),0x014A,0x0177) || \ CHAR_BETWEEN((_ch),0x0182,0x0185) || \ CHAR_BETWEEN((_ch),0x0198,0x0199) || \ CHAR_BETWEEN((_ch),0x01A0,0x01A5) || \ CHAR_BETWEEN((_ch),0x01AC,0x01AD) || \ CHAR_BETWEEN((_ch),0x01B8,0x01B9) || \ CHAR_BETWEEN((_ch),0x01BC,0x01BD) || \ CHAR_BETWEEN((_ch),0x01DE,0x01EF) || \ CHAR_BETWEEN((_ch),0x01F4,0x0217) || \ CHAR_BETWEEN((_ch),0x03E2,0x03EF) \ )) || \ ((!((_ch) & 1)) && ( \ CHAR_BETWEEN((_ch),0x0139,0x0148) || \ CHAR_BETWEEN((_ch),0x0179,0x017E) || \ CHAR_BETWEEN((_ch),0x0187,0x0188) || \ CHAR_BETWEEN((_ch),0x018B,0x018C) || \ CHAR_BETWEEN((_ch),0x0191,0x0192) || \ CHAR_BETWEEN((_ch),0x01A7,0x01A8) || \ CHAR_BETWEEN((_ch),0x01AF,0x01B0) || \ CHAR_BETWEEN((_ch),0x01B3,0x01B6) || \ CHAR_BETWEEN((_ch),0x01CD,0x01DC) \ )) || \ CHAR_BETWEEN((_ch),0x03B1,0x03CB) || \ 0 ) /* This is slower than it used to be because I changed to +/-32 rather than | 32 and & ~32 because of Greek and Cyrillic; you could change it back for (_ch) < 0x0100 */ #define XP_TO_LOWER(_ch) ( (! ( CHAR_BETWEEN((_ch),0x0100,0x024F) || \ CHAR_BETWEEN((_ch),0x03E2,0x03EF))) \ ? ((_ch) + 32) \ : ( ((_ch) == 0x0178) \ ? (0x00ff) \ : ((_ch) + 1) \ ) \ ) #define XP_TO_UPPER(_ch) ( (! (CHAR_BETWEEN((_ch),0x0100,0x024F) || \ CHAR_BETWEEN((_ch),0x03E2,0x03EF))) \ ?( ((_ch) == 0x00ff) \ ? (0x0178) \ : ( ((_ch) == 0x03C2) \ ? 0x03A3 \ : ((_ch) - 32) \ ) \ ) \ :((_ch) - 1) \ ) /* CODE TO TAKE ENDS HERE */ void printrow(UCHAR charnum) { printf("&#%d;&#%d;&#%d;\n", charnum, ( XP_IS_LOWERCASE(charnum)?XP_TO_UPPER(charnum):charnum ), ( XP_IS_UPPERCASE(charnum)?XP_TO_LOWER(charnum):charnum ) ); } void printhead() { printf("\n\nEntities List\n\n\n"); printf("\n"); } void printtail() { printf("
EntityCapitalizedLowercased
\n"); printf("
\n

\"Valid

\n

(Back to\nI18N Testing Information,\nDavid Baron)\n

LDB,\ndbaron@dbaron.org

\n\n"); } main() { UCHAR i; printhead(); for ( i = 32 ; i <= 127 ; i++ ) { printrow(i); } for ( i = 161 ; i < 0x0590 ; i++ ) { printrow(i); } printtail(); return 0; }