Eneboo - Documentación para desarrolladores
|
00001 /* Copyright (C) 2000 MySQL AB 00002 00003 This program is free software; you can redistribute it and/or modify 00004 it under the terms of the GNU General Public License as published by 00005 the Free Software Foundation; either version 2 of the License, or 00006 (at your option) any later version. 00007 00008 This program is distributed in the hope that it will be useful, 00009 but WITHOUT ANY WARRANTY; without even the implied warranty of 00010 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 00011 GNU General Public License for more details. 00012 00013 You should have received a copy of the GNU General Public License 00014 along with this program; if not, write to the Free Software 00015 Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA */ 00016 00017 /* 00018 A better inplementation of the UNIX ctype(3) library. 00019 Notes: my_global.h should be included before ctype.h 00020 */ 00021 00022 #ifndef _m_ctype_h 00023 #define _m_ctype_h 00024 00025 #ifdef __cplusplus 00026 extern "C" { 00027 #endif 00028 00029 #define MY_CS_NAME_SIZE 32 00030 #define MY_CS_CTYPE_TABLE_SIZE 257 00031 #define MY_CS_TO_LOWER_TABLE_SIZE 256 00032 #define MY_CS_TO_UPPER_TABLE_SIZE 256 00033 #define MY_CS_SORT_ORDER_TABLE_SIZE 256 00034 #define MY_CS_TO_UNI_TABLE_SIZE 256 00035 00036 #define CHARSET_DIR "charsets/" 00037 00038 #define my_wc_t ulong 00039 00040 typedef struct unicase_info_st 00041 { 00042 uint16 toupper; 00043 uint16 tolower; 00044 uint16 sort; 00045 } MY_UNICASE_INFO; 00046 00047 #define MY_CS_ILSEQ 0 00048 #define MY_CS_ILUNI 0 00049 #define MY_CS_TOOSMALL -1 00050 #define MY_CS_TOOFEW(n) (-1-(n)) 00051 00052 #define MY_SEQ_INTTAIL 1 00053 #define MY_SEQ_SPACES 2 00054 00055 /* My charsets_list flags */ 00056 #define MY_CS_COMPILED 1 /* compiled-in sets */ 00057 #define MY_CS_CONFIG 2 /* sets that have a *.conf file */ 00058 #define MY_CS_INDEX 4 /* sets listed in the Index file */ 00059 #define MY_CS_LOADED 8 /* sets that are currently loaded */ 00060 #define MY_CS_BINSORT 16 /* if binary sort order */ 00061 #define MY_CS_PRIMARY 32 /* if primary collation */ 00062 #define MY_CS_STRNXFRM 64 /* if strnxfrm is used for sort */ 00063 #define MY_CS_UNICODE 128 /* is a charset is full unicode */ 00064 #define MY_CS_READY 256 /* if a charset is initialized */ 00065 #define MY_CS_AVAILABLE 512 /* If either compiled-in or loaded*/ 00066 #define MY_CS_CSSORT 1024 /* if case sensitive sort order */ 00067 #define MY_CHARSET_UNDEFINED 0 00068 00069 00070 typedef struct my_uni_idx_st 00071 { 00072 uint16 from; 00073 uint16 to; 00074 uchar *tab; 00075 } MY_UNI_IDX; 00076 00077 typedef struct 00078 { 00079 uint beg; 00080 uint end; 00081 uint mblen; 00082 } my_match_t; 00083 00084 enum my_lex_states 00085 { 00086 MY_LEX_START, MY_LEX_CHAR, MY_LEX_IDENT, 00087 MY_LEX_IDENT_SEP, MY_LEX_IDENT_START, 00088 MY_LEX_REAL, MY_LEX_HEX_NUMBER, 00089 MY_LEX_CMP_OP, MY_LEX_LONG_CMP_OP, MY_LEX_STRING, MY_LEX_COMMENT, MY_LEX_END, 00090 MY_LEX_OPERATOR_OR_IDENT, MY_LEX_NUMBER_IDENT, MY_LEX_INT_OR_REAL, 00091 MY_LEX_REAL_OR_POINT, MY_LEX_BOOL, MY_LEX_EOL, MY_LEX_ESCAPE, 00092 MY_LEX_LONG_COMMENT, MY_LEX_END_LONG_COMMENT, MY_LEX_SEMICOLON, 00093 MY_LEX_SET_VAR, MY_LEX_USER_END, MY_LEX_HOSTNAME, MY_LEX_SKIP, 00094 MY_LEX_USER_VARIABLE_DELIMITER, MY_LEX_SYSTEM_VAR, 00095 MY_LEX_IDENT_OR_KEYWORD, 00096 MY_LEX_IDENT_OR_HEX, MY_LEX_IDENT_OR_BIN, MY_LEX_IDENT_OR_NCHAR, 00097 MY_LEX_STRING_OR_DELIMITER 00098 }; 00099 00100 struct charset_info_st; 00101 00102 typedef struct my_collation_handler_st 00103 { 00104 my_bool (*init)(struct charset_info_st *, void *(*alloc)(uint)); 00105 /* Collation routines */ 00106 int (*strnncoll)(struct charset_info_st *, 00107 const uchar *, uint, const uchar *, uint, my_bool); 00108 int (*strnncollsp)(struct charset_info_st *, 00109 const uchar *, uint, const uchar *, uint); 00110 int (*strnxfrm)(struct charset_info_st *, 00111 uchar *, uint, const uchar *, uint); 00112 my_bool (*like_range)(struct charset_info_st *, 00113 const char *s, uint s_length, 00114 pchar w_prefix, pchar w_one, pchar w_many, 00115 uint res_length, 00116 char *min_str, char *max_str, 00117 uint *min_len, uint *max_len); 00118 int (*wildcmp)(struct charset_info_st *, 00119 const char *str,const char *str_end, 00120 const char *wildstr,const char *wildend, 00121 int escape,int w_one, int w_many); 00122 00123 int (*strcasecmp)(struct charset_info_st *, const char *, const char *); 00124 00125 uint (*instr)(struct charset_info_st *, 00126 const char *b, uint b_length, 00127 const char *s, uint s_length, 00128 my_match_t *match, uint nmatch); 00129 00130 /* Hash calculation */ 00131 void (*hash_sort)(struct charset_info_st *cs, const uchar *key, uint len, 00132 ulong *nr1, ulong *nr2); 00133 } MY_COLLATION_HANDLER; 00134 00135 extern MY_COLLATION_HANDLER my_collation_mb_bin_handler; 00136 extern MY_COLLATION_HANDLER my_collation_8bit_bin_handler; 00137 extern MY_COLLATION_HANDLER my_collation_8bit_simple_ci_handler; 00138 extern MY_COLLATION_HANDLER my_collation_ucs2_uca_handler; 00139 00140 00141 typedef struct my_charset_handler_st 00142 { 00143 my_bool (*init)(struct charset_info_st *, void *(*alloc)(uint)); 00144 /* Multibyte routines */ 00145 int (*ismbchar)(struct charset_info_st *, const char *, const char *); 00146 int (*mbcharlen)(struct charset_info_st *, uint); 00147 uint (*numchars)(struct charset_info_st *, const char *b, const char *e); 00148 uint (*charpos)(struct charset_info_st *, const char *b, const char *e, uint pos); 00149 uint (*well_formed_len)(struct charset_info_st *, 00150 const char *b,const char *e, 00151 uint nchars, int *error); 00152 uint (*lengthsp)(struct charset_info_st *, const char *ptr, uint length); 00153 uint (*numcells)(struct charset_info_st *, const char *b, const char *e); 00154 00155 /* Unicode convertion */ 00156 int (*mb_wc)(struct charset_info_st *cs,my_wc_t *wc, 00157 const unsigned char *s,const unsigned char *e); 00158 int (*wc_mb)(struct charset_info_st *cs,my_wc_t wc, 00159 unsigned char *s,unsigned char *e); 00160 00161 /* Functions for case and sort convertion */ 00162 void (*caseup_str)(struct charset_info_st *, char *); 00163 void (*casedn_str)(struct charset_info_st *, char *); 00164 void (*caseup)(struct charset_info_st *, char *, uint); 00165 void (*casedn)(struct charset_info_st *, char *, uint); 00166 00167 /* Charset dependant snprintf() */ 00168 int (*snprintf)(struct charset_info_st *, char *to, uint n, const char *fmt, 00169 ...); 00170 int (*long10_to_str)(struct charset_info_st *, char *to, uint n, int radix, 00171 long int val); 00172 int (*longlong10_to_str)(struct charset_info_st *, char *to, uint n, 00173 int radix, longlong val); 00174 00175 void (*fill)(struct charset_info_st *, char *to, uint len, int fill); 00176 00177 /* String-to-number convertion routines */ 00178 long (*strntol)(struct charset_info_st *, const char *s, uint l, 00179 int base, char **e, int *err); 00180 ulong (*strntoul)(struct charset_info_st *, const char *s, uint l, 00181 int base, char **e, int *err); 00182 longlong (*strntoll)(struct charset_info_st *, const char *s, uint l, 00183 int base, char **e, int *err); 00184 ulonglong (*strntoull)(struct charset_info_st *, const char *s, uint l, 00185 int base, char **e, int *err); 00186 double (*strntod)(struct charset_info_st *, char *s, uint l, char **e, 00187 int *err); 00188 longlong (*my_strtoll10)(struct charset_info_st *cs, 00189 const char *nptr, char **endptr, int *error); 00190 ulong (*scan)(struct charset_info_st *, const char *b, const char *e, 00191 int sq); 00192 } MY_CHARSET_HANDLER; 00193 00194 extern MY_CHARSET_HANDLER my_charset_8bit_handler; 00195 extern MY_CHARSET_HANDLER my_charset_ucs2_handler; 00196 00197 00198 typedef struct charset_info_st 00199 { 00200 uint number; 00201 uint primary_number; 00202 uint binary_number; 00203 uint state; 00204 const char *csname; 00205 const char *name; 00206 const char *comment; 00207 const char *tailoring; 00208 uchar *ctype; 00209 uchar *to_lower; 00210 uchar *to_upper; 00211 uchar *sort_order; 00212 uint16 *contractions; 00213 uint16 **sort_order_big; 00214 uint16 *tab_to_uni; 00215 MY_UNI_IDX *tab_from_uni; 00216 uchar *state_map; 00217 uchar *ident_map; 00218 uint strxfrm_multiply; 00219 uint mbminlen; 00220 uint mbmaxlen; 00221 uint16 min_sort_char; 00222 uint16 max_sort_char; /* For LIKE optimization */ 00223 my_bool escape_with_backslash_is_dangerous; 00224 00225 MY_CHARSET_HANDLER *cset; 00226 MY_COLLATION_HANDLER *coll; 00227 00228 } CHARSET_INFO; 00229 00230 00231 extern CHARSET_INFO my_charset_bin; 00232 extern CHARSET_INFO my_charset_big5_chinese_ci; 00233 extern CHARSET_INFO my_charset_big5_bin; 00234 extern CHARSET_INFO my_charset_cp932_japanese_ci; 00235 extern CHARSET_INFO my_charset_cp932_bin; 00236 extern CHARSET_INFO my_charset_euckr_korean_ci; 00237 extern CHARSET_INFO my_charset_euckr_bin; 00238 extern CHARSET_INFO my_charset_gb2312_chinese_ci; 00239 extern CHARSET_INFO my_charset_gb2312_bin; 00240 extern CHARSET_INFO my_charset_gbk_chinese_ci; 00241 extern CHARSET_INFO my_charset_gbk_bin; 00242 extern CHARSET_INFO my_charset_latin1; 00243 extern CHARSET_INFO my_charset_latin1_german2_ci; 00244 extern CHARSET_INFO my_charset_latin1_bin; 00245 extern CHARSET_INFO my_charset_latin2_czech_ci; 00246 extern CHARSET_INFO my_charset_sjis_japanese_ci; 00247 extern CHARSET_INFO my_charset_sjis_bin; 00248 extern CHARSET_INFO my_charset_tis620_thai_ci; 00249 extern CHARSET_INFO my_charset_tis620_bin; 00250 extern CHARSET_INFO my_charset_ucs2_general_ci; 00251 extern CHARSET_INFO my_charset_ucs2_bin; 00252 extern CHARSET_INFO my_charset_ucs2_general_uca; 00253 extern CHARSET_INFO my_charset_ujis_japanese_ci; 00254 extern CHARSET_INFO my_charset_ujis_bin; 00255 extern CHARSET_INFO my_charset_utf8_general_ci; 00256 extern CHARSET_INFO my_charset_utf8_bin; 00257 extern CHARSET_INFO my_charset_cp1250_czech_ci; 00258 00259 /* declarations for simple charsets */ 00260 extern int my_strnxfrm_simple(CHARSET_INFO *, uchar *, uint, const uchar *, 00261 uint); 00262 extern int my_strnncoll_simple(CHARSET_INFO *, const uchar *, uint, 00263 const uchar *, uint, my_bool); 00264 00265 extern int my_strnncollsp_simple(CHARSET_INFO *, const uchar *, uint, 00266 const uchar *, uint); 00267 00268 extern void my_hash_sort_simple(CHARSET_INFO *cs, 00269 const uchar *key, uint len, 00270 ulong *nr1, ulong *nr2); 00271 00272 extern uint my_lengthsp_8bit(CHARSET_INFO *cs, const char *ptr, uint length); 00273 00274 extern uint my_instr_simple(struct charset_info_st *, 00275 const char *b, uint b_length, 00276 const char *s, uint s_length, 00277 my_match_t *match, uint nmatch); 00278 00279 00280 /* Functions for 8bit */ 00281 extern void my_caseup_str_8bit(CHARSET_INFO *, char *); 00282 extern void my_casedn_str_8bit(CHARSET_INFO *, char *); 00283 extern void my_caseup_8bit(CHARSET_INFO *, char *, uint); 00284 extern void my_casedn_8bit(CHARSET_INFO *, char *, uint); 00285 00286 extern int my_strcasecmp_8bit(CHARSET_INFO * cs, const char *, const char *); 00287 00288 int my_mb_wc_8bit(CHARSET_INFO *cs,my_wc_t *wc, const uchar *s,const uchar *e); 00289 int my_wc_mb_8bit(CHARSET_INFO *cs,my_wc_t wc, uchar *s, uchar *e); 00290 00291 ulong my_scan_8bit(CHARSET_INFO *cs, const char *b, const char *e, int sq); 00292 00293 int my_snprintf_8bit(struct charset_info_st *, char *to, uint n, 00294 const char *fmt, ...); 00295 00296 long my_strntol_8bit(CHARSET_INFO *, const char *s, uint l, int base, 00297 char **e, int *err); 00298 ulong my_strntoul_8bit(CHARSET_INFO *, const char *s, uint l, int base, 00299 char **e, int *err); 00300 longlong my_strntoll_8bit(CHARSET_INFO *, const char *s, uint l, int base, 00301 char **e, int *err); 00302 ulonglong my_strntoull_8bit(CHARSET_INFO *, const char *s, uint l, int base, 00303 char **e, int *err); 00304 double my_strntod_8bit(CHARSET_INFO *, char *s, uint l,char **e, 00305 int *err); 00306 int my_long10_to_str_8bit(CHARSET_INFO *, char *to, uint l, int radix, 00307 long int val); 00308 int my_longlong10_to_str_8bit(CHARSET_INFO *, char *to, uint l, int radix, 00309 longlong val); 00310 00311 longlong my_strtoll10_8bit(CHARSET_INFO *cs, 00312 const char *nptr, char **endptr, int *error); 00313 longlong my_strtoll10_ucs2(CHARSET_INFO *cs, 00314 const char *nptr, char **endptr, int *error); 00315 00316 void my_fill_8bit(CHARSET_INFO *cs, char* to, uint l, int fill); 00317 00318 my_bool my_like_range_simple(CHARSET_INFO *cs, 00319 const char *ptr, uint ptr_length, 00320 pbool escape, pbool w_one, pbool w_many, 00321 uint res_length, 00322 char *min_str, char *max_str, 00323 uint *min_length, uint *max_length); 00324 00325 my_bool my_like_range_mb(CHARSET_INFO *cs, 00326 const char *ptr, uint ptr_length, 00327 pbool escape, pbool w_one, pbool w_many, 00328 uint res_length, 00329 char *min_str, char *max_str, 00330 uint *min_length, uint *max_length); 00331 00332 my_bool my_like_range_ucs2(CHARSET_INFO *cs, 00333 const char *ptr, uint ptr_length, 00334 pbool escape, pbool w_one, pbool w_many, 00335 uint res_length, 00336 char *min_str, char *max_str, 00337 uint *min_length, uint *max_length); 00338 00339 00340 int my_wildcmp_8bit(CHARSET_INFO *, 00341 const char *str,const char *str_end, 00342 const char *wildstr,const char *wildend, 00343 int escape, int w_one, int w_many); 00344 00345 uint my_numchars_8bit(CHARSET_INFO *, const char *b, const char *e); 00346 uint my_numcells_8bit(CHARSET_INFO *, const char *b, const char *e); 00347 uint my_charpos_8bit(CHARSET_INFO *, const char *b, const char *e, uint pos); 00348 uint my_well_formed_len_8bit(CHARSET_INFO *, const char *b, const char *e, 00349 uint pos, int *error); 00350 int my_mbcharlen_8bit(CHARSET_INFO *, uint c); 00351 00352 00353 /* Functions for multibyte charsets */ 00354 extern void my_caseup_str_mb(CHARSET_INFO *, char *); 00355 extern void my_casedn_str_mb(CHARSET_INFO *, char *); 00356 extern void my_caseup_mb(CHARSET_INFO *, char *, uint); 00357 extern void my_casedn_mb(CHARSET_INFO *, char *, uint); 00358 extern int my_strcasecmp_mb(CHARSET_INFO * cs,const char *, const char *); 00359 00360 int my_wildcmp_mb(CHARSET_INFO *, 00361 const char *str,const char *str_end, 00362 const char *wildstr,const char *wildend, 00363 int escape, int w_one, int w_many); 00364 uint my_numchars_mb(CHARSET_INFO *, const char *b, const char *e); 00365 uint my_numcells_mb(CHARSET_INFO *, const char *b, const char *e); 00366 uint my_charpos_mb(CHARSET_INFO *, const char *b, const char *e, uint pos); 00367 uint my_well_formed_len_mb(CHARSET_INFO *, const char *b, const char *e, 00368 uint pos, int *error); 00369 uint my_instr_mb(struct charset_info_st *, 00370 const char *b, uint b_length, 00371 const char *s, uint s_length, 00372 my_match_t *match, uint nmatch); 00373 00374 int my_wildcmp_unicode(CHARSET_INFO *cs, 00375 const char *str, const char *str_end, 00376 const char *wildstr, const char *wildend, 00377 int escape, int w_one, int w_many, 00378 MY_UNICASE_INFO **weights); 00379 00380 extern my_bool my_parse_charset_xml(const char *bug, uint len, 00381 int (*add)(CHARSET_INFO *cs)); 00382 00383 #define _MY_U 01 /* Upper case */ 00384 #define _MY_L 02 /* Lower case */ 00385 #define _MY_NMR 04 /* Numeral (digit) */ 00386 #define _MY_SPC 010 /* Spacing character */ 00387 #define _MY_PNT 020 /* Punctuation */ 00388 #define _MY_CTR 040 /* Control character */ 00389 #define _MY_B 0100 /* Blank */ 00390 #define _MY_X 0200 /* heXadecimal digit */ 00391 00392 00393 #define my_isascii(c) (!((c) & ~0177)) 00394 #define my_toascii(c) ((c) & 0177) 00395 #define my_tocntrl(c) ((c) & 31) 00396 #define my_toprint(c) ((c) | 64) 00397 #define my_toupper(s,c) (char) ((s)->to_upper[(uchar) (c)]) 00398 #define my_tolower(s,c) (char) ((s)->to_lower[(uchar) (c)]) 00399 #define my_isalpha(s, c) (((s)->ctype+1)[(uchar) (c)] & (_MY_U | _MY_L)) 00400 #define my_isupper(s, c) (((s)->ctype+1)[(uchar) (c)] & _MY_U) 00401 #define my_islower(s, c) (((s)->ctype+1)[(uchar) (c)] & _MY_L) 00402 #define my_isdigit(s, c) (((s)->ctype+1)[(uchar) (c)] & _MY_NMR) 00403 #define my_isxdigit(s, c) (((s)->ctype+1)[(uchar) (c)] & _MY_X) 00404 #define my_isalnum(s, c) (((s)->ctype+1)[(uchar) (c)] & (_MY_U | _MY_L | _MY_NMR)) 00405 #define my_isspace(s, c) (((s)->ctype+1)[(uchar) (c)] & _MY_SPC) 00406 #define my_ispunct(s, c) (((s)->ctype+1)[(uchar) (c)] & _MY_PNT) 00407 #define my_isprint(s, c) (((s)->ctype+1)[(uchar) (c)] & (_MY_PNT | _MY_U | _MY_L | _MY_NMR | _MY_B)) 00408 #define my_isgraph(s, c) (((s)->ctype+1)[(uchar) (c)] & (_MY_PNT | _MY_U | _MY_L | _MY_NMR)) 00409 #define my_iscntrl(s, c) (((s)->ctype+1)[(uchar) (c)] & _MY_CTR) 00410 00411 /* Some macros that should be cleaned up a little */ 00412 #define my_isvar(s,c) (my_isalnum(s,c) || (c) == '_') 00413 #define my_isvar_start(s,c) (my_isalpha(s,c) || (c) == '_') 00414 00415 #define my_binary_compare(s) ((s)->state & MY_CS_BINSORT) 00416 #define use_strnxfrm(s) ((s)->state & MY_CS_STRNXFRM) 00417 #define my_strnxfrm(s, a, b, c, d) ((s)->coll->strnxfrm((s), (a), (b), (c), (d))) 00418 #define my_strnncoll(s, a, b, c, d) ((s)->coll->strnncoll((s), (a), (b), (c), (d), 0)) 00419 #define my_like_range(s, a, b, c, d, e, f, g, h, i, j) \ 00420 ((s)->coll->like_range((s), (a), (b), (c), (d), (e), (f), (g), (h), (i), (j))) 00421 #define my_wildcmp(cs,s,se,w,we,e,o,m) ((cs)->coll->wildcmp((cs),(s),(se),(w),(we),(e),(o),(m))) 00422 #define my_strcasecmp(s, a, b) ((s)->coll->strcasecmp((s), (a), (b))) 00423 #define my_charpos(cs, b, e, num) (cs)->cset->charpos((cs), (const char*) (b), (const char *)(e), (num)) 00424 00425 00426 #define use_mb(s) ((s)->cset->ismbchar != NULL) 00427 #define my_ismbchar(s, a, b) ((s)->cset->ismbchar((s), (a), (b))) 00428 #ifdef USE_MB 00429 #define my_mbcharlen(s, a) ((s)->cset->mbcharlen((s),(a))) 00430 #else 00431 #define my_mbcharlen(s, a) 1 00432 #endif 00433 00434 #define my_caseup(s, a, l) ((s)->cset->caseup((s), (a), (l))) 00435 #define my_casedn(s, a, l) ((s)->cset->casedn((s), (a), (l))) 00436 #define my_caseup_str(s, a) ((s)->cset->caseup_str((s), (a))) 00437 #define my_casedn_str(s, a) ((s)->cset->casedn_str((s), (a))) 00438 #define my_strntol(s, a, b, c, d, e) ((s)->cset->strntol((s),(a),(b),(c),(d),(e))) 00439 #define my_strntoul(s, a, b, c, d, e) ((s)->cset->strntoul((s),(a),(b),(c),(d),(e))) 00440 #define my_strntoll(s, a, b, c, d, e) ((s)->cset->strntoll((s),(a),(b),(c),(d),(e))) 00441 #define my_strntoull(s, a, b, c,d, e) ((s)->cset->strntoull((s),(a),(b),(c),(d),(e))) 00442 #define my_strntod(s, a, b, c, d) ((s)->cset->strntod((s),(a),(b),(c),(d))) 00443 00444 00445 /* XXX: still need to take care of this one */ 00446 #ifdef MY_CHARSET_TIS620 00447 #error The TIS620 charset is broken at the moment. Tell tim to fix it. 00448 #define USE_TIS620 00449 #include "t_ctype.h" 00450 #endif 00451 00452 #ifdef __cplusplus 00453 } 00454 #endif 00455 00456 #endif /* _m_ctype_h */