Eneboo - Documentación para desarrolladores
|
00001 /* $PostgreSQL: pgsql/src/include/mb/pg_wchar.h,v 1.63.2.1 2006/05/21 20:05:50 tgl Exp $ */ 00002 00003 #ifndef PG_WCHAR_H 00004 #define PG_WCHAR_H 00005 00006 #include <sys/types.h> 00007 00008 #ifdef FRONTEND 00009 #undef palloc 00010 #define palloc malloc 00011 #undef pfree 00012 #define pfree free 00013 #endif 00014 00015 /* 00016 * The pg_wchar 00017 */ 00018 typedef unsigned int pg_wchar; 00019 00020 /* 00021 * various definitions for EUC 00022 */ 00023 #define SS2 0x8e /* single shift 2 (JIS0201) */ 00024 #define SS3 0x8f /* single shift 3 (JIS0212) */ 00025 00026 /* 00027 * SJIS validation macros 00028 */ 00029 #define ISSJISHEAD(c) (((c) >= 0x81 && (c) <= 0x9f) || ((c) >= 0xe0 && (c) <= 0xfc)) 00030 #define ISSJISTAIL(c) (((c) >= 0x40 && (c) <= 0x7e) || ((c) >= 0x80 && (c) <= 0xfc)) 00031 00032 /* 00033 * Leading byte types or leading prefix byte for MULE internal code. 00034 * See http://www.xemacs.org for more details. (there is a doc titled 00035 * "XEmacs Internals Manual", "MULE Character Sets and Encodings" 00036 * section.) 00037 */ 00038 /* 00039 * Is a leading byte for "official" single byte encodings? 00040 */ 00041 #define IS_LC1(c) ((unsigned char)(c) >= 0x81 && (unsigned char)(c) <= 0x8d) 00042 /* 00043 * Is a prefix byte for "private" single byte encodings? 00044 */ 00045 #define IS_LCPRV1(c) ((unsigned char)(c) == 0x9a || (unsigned char)(c) == 0x9b) 00046 /* 00047 * Is a leading byte for "official" multibyte encodings? 00048 */ 00049 #define IS_LC2(c) ((unsigned char)(c) >= 0x90 && (unsigned char)(c) <= 0x99) 00050 /* 00051 * Is a prefix byte for "private" multibyte encodings? 00052 */ 00053 #define IS_LCPRV2(c) ((unsigned char)(c) == 0x9c || (unsigned char)(c) == 0x9d) 00054 00055 /*---------------------------------------------------- 00056 * leading characters 00057 *---------------------------------------------------- 00058 */ 00059 00060 /* 00061 * Official single byte encodings (0x81-0x8e) 00062 */ 00063 #define LC_ISO8859_1 0x81 /* ISO8859 Latin 1 */ 00064 #define LC_ISO8859_2 0x82 /* ISO8859 Latin 2 */ 00065 #define LC_ISO8859_3 0x83 /* ISO8859 Latin 3 */ 00066 #define LC_ISO8859_4 0x84 /* ISO8859 Latin 4 */ 00067 #define LC_TIS620 0x85 /* Thai (not supported yet) */ 00068 #define LC_ISO8859_7 0x86 /* Greek (not supported yet) */ 00069 #define LC_ISO8859_6 0x87 /* Arabic (not supported yet) */ 00070 #define LC_ISO8859_8 0x88 /* Hebrew (not supported yet) */ 00071 #define LC_JISX0201K 0x89 /* Japanese 1 byte kana */ 00072 #define LC_JISX0201R 0x8a /* Japanese 1 byte Roman */ 00073 /* Note that 0x8b seems to be unused as of Emacs 20.7. 00074 * However, there might be a chance that 0x8b could be used 00075 * in later version of Emacs. 00076 */ 00077 #define LC_KOI8_R 0x8b /* Cyrillic KOI8-R */ 00078 #define LC_KOI8_U 0x8b /* Cyrillic KOI8-U */ 00079 #define LC_ISO8859_5 0x8c /* ISO8859 Cyrillic */ 00080 #define LC_ISO8859_9 0x8d /* ISO8859 Latin 5 (not supported yet) */ 00081 /* #define FREE 0x8e free (unused) */ 00082 00083 /* 00084 * Unused 00085 */ 00086 #define CONTROL_1 0x8f /* control characters (unused) */ 00087 00088 /* 00089 * Official multibyte byte encodings (0x90-0x99) 00090 * 0x9a-0x9d are free. 0x9e and 0x9f are reserved. 00091 */ 00092 #define LC_JISX0208_1978 0x90 /* Japanese Kanji, old JIS (not supported) */ 00093 /* #define FREE 0x90 free (unused) */ 00094 #define LC_GB2312_80 0x91 /* Chinese */ 00095 #define LC_JISX0208 0x92 /* Japanese Kanji (JIS X 0208) */ 00096 #define LC_KS5601 0x93 /* Korean */ 00097 #define LC_JISX0212 0x94 /* Japanese Kanji (JIS X 0212) */ 00098 #define LC_CNS11643_1 0x95 /* CNS 11643-1992 Plane 1 */ 00099 #define LC_CNS11643_2 0x96 /* CNS 11643-1992 Plane 2 */ 00100 /* #define FREE 0x97 free (unused) */ 00101 #define LC_BIG5_1 0x98 /* Plane 1 Chinese traditional (not supported) */ 00102 #define LC_BIG5_2 0x99 /* Plane 1 Chinese traditional (not supported) */ 00103 00104 /* 00105 * Private single byte encodings (0xa0-0xef) 00106 */ 00107 #define LC_SISHENG 0xa0 /* Chinese SiSheng characters for 00108 * PinYin/ZhuYin (not supported) */ 00109 #define LC_IPA 0xa1 /* IPA (International Phonetic Association) 00110 * (not supported) */ 00111 #define LC_VISCII_LOWER 0xa2 /* Vietnamese VISCII1.1 lower-case (not 00112 * supported) */ 00113 #define LC_VISCII_UPPER 0xa3 /* Vietnamese VISCII1.1 upper-case (not 00114 * supported) */ 00115 #define LC_ARABIC_DIGIT 0xa4 /* Arabic digit (not supported) */ 00116 #define LC_ARABIC_1_COLUMN 0xa5 /* Arabic 1-column (not supported) */ 00117 #define LC_ASCII_RIGHT_TO_LEFT 0xa6 /* ASCII (left half of ISO8859-1) with 00118 * right-to-left direction (not 00119 * supported) */ 00120 #define LC_LAO 0xa7 /* Lao characters (ISO10646 0E80..0EDF) (not 00121 * supported) */ 00122 #define LC_ARABIC_2_COLUMN 0xa8 /* Arabic 1-column (not supported) */ 00123 00124 /* 00125 * Private multibyte encodings (0xf0-0xff) 00126 */ 00127 #define LC_INDIAN_1_COLUMN 0xf0/* Indian charset for 1-column width glypps 00128 * (not supported) */ 00129 #define LC_TIBETAN_1_COLUMN 0xf1 /* Tibetan 1 column glyph (not supported) */ 00130 #define LC_ETHIOPIC 0xf5 /* Ethiopic characters (not supported) */ 00131 #define LC_CNS11643_3 0xf6 /* CNS 11643-1992 Plane 3 */ 00132 #define LC_CNS11643_4 0xf7 /* CNS 11643-1992 Plane 4 */ 00133 #define LC_CNS11643_5 0xf8 /* CNS 11643-1992 Plane 5 */ 00134 #define LC_CNS11643_6 0xf9 /* CNS 11643-1992 Plane 6 */ 00135 #define LC_CNS11643_7 0xfa /* CNS 11643-1992 Plane 7 */ 00136 #define LC_INDIAN_2_COLUMN 0xfb/* Indian charset for 2-column width glypps 00137 * (not supported) */ 00138 #define LC_TIBETAN 0xfc /* Tibetan (not supported) */ 00139 /* #define FREE 0xfd free (unused) */ 00140 /* #define FREE 0xfe free (unused) */ 00141 /* #define FREE 0xff free (unused) */ 00142 00143 /* 00144 * PostgreSQL encoding identifiers 00145 * 00146 * WARNING: the order of this table must be same as order 00147 * in the pg_enc2name[] (mb/encnames.c) array! 00148 * 00149 * If you add some encoding don't forget to check 00150 * PG_ENCODING_BE_LAST macro. 00151 * 00152 * The PG_SQL_ASCII is default encoding and must be = 0. 00153 */ 00154 typedef enum pg_enc 00155 { 00156 PG_SQL_ASCII = 0, /* SQL/ASCII */ 00157 PG_EUC_JP, /* EUC for Japanese */ 00158 PG_EUC_CN, /* EUC for Chinese */ 00159 PG_EUC_KR, /* EUC for Korean */ 00160 PG_EUC_TW, /* EUC for Taiwan */ 00161 PG_JOHAB, /* EUC for Korean JOHAB */ 00162 PG_UTF8, /* Unicode UTF8 */ 00163 PG_MULE_INTERNAL, /* Mule internal code */ 00164 PG_LATIN1, /* ISO-8859-1 Latin 1 */ 00165 PG_LATIN2, /* ISO-8859-2 Latin 2 */ 00166 PG_LATIN3, /* ISO-8859-3 Latin 3 */ 00167 PG_LATIN4, /* ISO-8859-4 Latin 4 */ 00168 PG_LATIN5, /* ISO-8859-9 Latin 5 */ 00169 PG_LATIN6, /* ISO-8859-10 Latin6 */ 00170 PG_LATIN7, /* ISO-8859-13 Latin7 */ 00171 PG_LATIN8, /* ISO-8859-14 Latin8 */ 00172 PG_LATIN9, /* ISO-8859-15 Latin9 */ 00173 PG_LATIN10, /* ISO-8859-16 Latin10 */ 00174 PG_WIN1256, /* windows-1256 */ 00175 PG_WIN1258, /* Windows-1258 */ 00176 PG_WIN866, /* (MS-DOS CP866) */ 00177 PG_WIN874, /* windows-874 */ 00178 PG_KOI8R, /* KOI8-R */ 00179 PG_WIN1251, /* windows-1251 */ 00180 PG_WIN1252, /* windows-1252 */ 00181 PG_ISO_8859_5, /* ISO-8859-5 */ 00182 PG_ISO_8859_6, /* ISO-8859-6 */ 00183 PG_ISO_8859_7, /* ISO-8859-7 */ 00184 PG_ISO_8859_8, /* ISO-8859-8 */ 00185 PG_WIN1250, /* windows-1250 */ 00186 00187 /* followings are for client encoding only */ 00188 PG_SJIS, /* Shift JIS (Winindows-932) */ 00189 PG_BIG5, /* Big5 (Windows-950) */ 00190 PG_GBK, /* GBK (Windows-936) */ 00191 PG_UHC, /* UHC (Windows-949) */ 00192 PG_GB18030, /* GB18030 */ 00193 _PG_LAST_ENCODING_ /* mark only */ 00194 00195 } pg_enc; 00196 00197 #define PG_ENCODING_BE_LAST PG_WIN1250 00198 00199 /* 00200 * Please use these tests before access to pg_encconv_tbl[] 00201 * or to other places... 00202 */ 00203 #define PG_VALID_BE_ENCODING(_enc) \ 00204 ((_enc) >= 0 && (_enc) <= PG_ENCODING_BE_LAST) 00205 00206 #define PG_ENCODING_IS_CLIENT_ONLY(_enc) \ 00207 ((_enc) > PG_ENCODING_BE_LAST && (_enc) < _PG_LAST_ENCODING_) 00208 00209 #define PG_VALID_ENCODING(_enc) \ 00210 ((_enc) >= 0 && (_enc) < _PG_LAST_ENCODING_) 00211 00212 /* On FE are possible all encodings */ 00213 #define PG_VALID_FE_ENCODING(_enc) PG_VALID_ENCODING(_enc) 00214 00215 /* 00216 * Encoding names with all aliases 00217 */ 00218 typedef struct pg_encname 00219 { 00220 char *name; 00221 pg_enc encoding; 00222 } pg_encname; 00223 00224 extern pg_encname pg_encname_tbl[]; 00225 extern unsigned int pg_encname_tbl_sz; 00226 00227 /* 00228 * Careful: 00229 * 00230 * if (PG_VALID_ENCODING(encoding)) 00231 * pg_enc2name_tbl[ encoding ]; 00232 */ 00233 typedef struct pg_enc2name 00234 { 00235 char *name; 00236 pg_enc encoding; 00237 } pg_enc2name; 00238 00239 extern pg_enc2name pg_enc2name_tbl[]; 00240 00241 extern pg_encname *pg_char_to_encname_struct(const char *name); 00242 00243 extern int pg_char_to_encoding(const char *s); 00244 extern const char *pg_encoding_to_char(int encoding); 00245 00246 /* 00247 * pg_wchar stuff 00248 */ 00249 typedef int (*mb2wchar_with_len_converter) (const unsigned char *from, 00250 pg_wchar *to, 00251 int len); 00252 00253 typedef int (*mblen_converter) (const unsigned char *mbstr); 00254 00255 typedef int (*mbdisplaylen_converter) (const unsigned char *mbstr); 00256 00257 typedef int (*mbverifier) (const unsigned char *mbstr, int len); 00258 00259 typedef struct 00260 { 00261 mb2wchar_with_len_converter mb2wchar_with_len; /* convert a multibyte 00262 * string to a wchar */ 00263 mblen_converter mblen; /* get byte length of a char */ 00264 mbdisplaylen_converter dsplen; /* get display width of a char */ 00265 mbverifier mbverify; /* verify multibyte sequence */ 00266 int maxmblen; /* max bytes for a char in this encoding */ 00267 } pg_wchar_tbl; 00268 00269 extern pg_wchar_tbl pg_wchar_table[]; 00270 00271 /* 00272 * UTF8 to local code conversion map 00273 */ 00274 typedef struct 00275 { 00276 unsigned int utf; /* UTF8 */ 00277 unsigned int code; /* local code */ 00278 } pg_utf_to_local; 00279 00280 /* 00281 * local code to UTF8 conversion map 00282 */ 00283 typedef struct 00284 { 00285 unsigned int code; /* local code */ 00286 unsigned int utf; /* UTF8 */ 00287 } pg_local_to_utf; 00288 00289 extern int pg_mb2wchar(const char *from, pg_wchar *to); 00290 extern int pg_mb2wchar_with_len(const char *from, pg_wchar *to, int len); 00291 extern int pg_char_and_wchar_strcmp(const char *s1, const pg_wchar *s2); 00292 extern int pg_wchar_strncmp(const pg_wchar *s1, const pg_wchar *s2, size_t n); 00293 extern int pg_char_and_wchar_strncmp(const char *s1, const pg_wchar *s2, size_t n); 00294 extern size_t pg_wchar_strlen(const pg_wchar *wstr); 00295 extern int pg_mblen(const char *mbstr); 00296 extern int pg_dsplen(const char *mbstr); 00297 extern int pg_encoding_mblen(int encoding, const char *mbstr); 00298 extern int pg_encoding_dsplen(int encoding, const char *mbstr); 00299 extern int pg_encoding_verifymb(int encoding, const char *mbstr, int len); 00300 extern int pg_mule_mblen(const unsigned char *mbstr); 00301 extern int pg_mic_mblen(const unsigned char *mbstr); 00302 extern int pg_mbstrlen(const char *mbstr); 00303 extern int pg_mbstrlen_with_len(const char *mbstr, int len); 00304 extern int pg_mbcliplen(const char *mbstr, int len, int limit); 00305 extern int pg_mbcharcliplen(const char *mbstr, int len, int imit); 00306 extern int pg_encoding_max_length(int encoding); 00307 extern int pg_database_encoding_max_length(void); 00308 00309 extern void SetDefaultClientEncoding(void); 00310 extern int SetClientEncoding(int encoding, bool doit); 00311 extern void InitializeClientEncoding(void); 00312 extern int pg_get_client_encoding(void); 00313 extern const char *pg_get_client_encoding_name(void); 00314 00315 extern void SetDatabaseEncoding(int encoding); 00316 extern int GetDatabaseEncoding(void); 00317 extern const char *GetDatabaseEncodingName(void); 00318 00319 extern int pg_valid_client_encoding(const char *name); 00320 extern int pg_valid_server_encoding(const char *name); 00321 00322 extern int pg_utf_mblen(const unsigned char *); 00323 extern unsigned char *pg_do_encoding_conversion(unsigned char *src, int len, 00324 int src_encoding, 00325 int dest_encoding); 00326 00327 extern char *pg_client_to_server(const char *s, int len); 00328 extern char *pg_server_to_client(const char *s, int len); 00329 00330 extern unsigned short BIG5toCNS(unsigned short big5, unsigned char *lc); 00331 extern unsigned short CNStoBIG5(unsigned short cns, unsigned char lc); 00332 00333 extern void LocalToUtf(const unsigned char *iso, unsigned char *utf, 00334 const pg_local_to_utf *map, int size, int encoding, int len); 00335 00336 extern void UtfToLocal(const unsigned char *utf, unsigned char *iso, 00337 const pg_utf_to_local *map, int size, int encoding, int len); 00338 00339 extern bool pg_verifymbstr(const char *mbstr, int len, bool noError); 00340 extern bool pg_verify_mbstr(int encoding, const char *mbstr, int len, 00341 bool noError); 00342 00343 extern void report_invalid_encoding(int encoding, const char *mbstr, int len); 00344 extern void report_untranslatable_char(int src_encoding, int dest_encoding, 00345 const char *mbstr, int len); 00346 00347 extern void pg_ascii2mic(const unsigned char *l, unsigned char *p, int len); 00348 extern void pg_mic2ascii(const unsigned char *mic, unsigned char *p, int len); 00349 extern void latin2mic(const unsigned char *l, unsigned char *p, int len, 00350 int lc, int encoding); 00351 extern void mic2latin(const unsigned char *mic, unsigned char *p, int len, 00352 int lc, int encoding); 00353 extern void latin2mic_with_table(const unsigned char *l, unsigned char *p, 00354 int len, int lc, int encoding, 00355 const unsigned char *tab); 00356 extern void mic2latin_with_table(const unsigned char *mic, unsigned char *p, 00357 int len, int lc, int encoding, 00358 const unsigned char *tab); 00359 00360 extern bool pg_utf8_islegal(const unsigned char *source, int length); 00361 00362 #endif /* PG_WCHAR_H */