Eneboo - Documentación para desarrolladores
src/libpq/include/mb/pg_wchar.h
Ir a la documentación de este archivo.
00001 /* $PostgreSQL: pgsql/src/include/mb/pg_wchar.h,v 1.63.2.1 2006/05/21 20:05:50 tgl Exp $ */
00002 
00003 #ifndef PG_WCHAR_H
00004 #define PG_WCHAR_H
00005 
00006 #include <sys/types.h>
00007 
00008 #ifdef FRONTEND
00009 #undef palloc
00010 #define palloc malloc
00011 #undef pfree
00012 #define pfree free
00013 #endif
00014 
00015 /*
00016  * The pg_wchar
00017  */
00018 typedef unsigned int pg_wchar;
00019 
00020 /*
00021  * various definitions for EUC
00022  */
00023 #define SS2 0x8e                                /* single shift 2 (JIS0201) */
00024 #define SS3 0x8f                                /* single shift 3 (JIS0212) */
00025 
00026 /*
00027  * SJIS validation macros
00028  */
00029 #define ISSJISHEAD(c) (((c) >= 0x81 && (c) <= 0x9f) || ((c) >= 0xe0 && (c) <= 0xfc))
00030 #define ISSJISTAIL(c) (((c) >= 0x40 && (c) <= 0x7e) || ((c) >= 0x80 && (c) <= 0xfc))
00031 
00032 /*
00033  * Leading byte types or leading prefix byte for MULE internal code.
00034  * See http://www.xemacs.org for more details.  (there is a doc titled
00035  * "XEmacs Internals Manual", "MULE Character Sets and Encodings"
00036  * section.)
00037  */
00038 /*
00039  * Is a leading byte for "official" single byte encodings?
00040  */
00041 #define IS_LC1(c)       ((unsigned char)(c) >= 0x81 && (unsigned char)(c) <= 0x8d)
00042 /*
00043  * Is a prefix byte for "private" single byte encodings?
00044  */
00045 #define IS_LCPRV1(c)    ((unsigned char)(c) == 0x9a || (unsigned char)(c) == 0x9b)
00046 /*
00047  * Is a leading byte for "official" multibyte encodings?
00048  */
00049 #define IS_LC2(c)       ((unsigned char)(c) >= 0x90 && (unsigned char)(c) <= 0x99)
00050 /*
00051  * Is a prefix byte for "private" multibyte encodings?
00052  */
00053 #define IS_LCPRV2(c)    ((unsigned char)(c) == 0x9c || (unsigned char)(c) == 0x9d)
00054 
00055 /*----------------------------------------------------
00056  * leading characters
00057  *----------------------------------------------------
00058  */
00059 
00060 /*
00061  * Official single byte encodings (0x81-0x8e)
00062  */
00063 #define LC_ISO8859_1    0x81    /* ISO8859 Latin 1 */
00064 #define LC_ISO8859_2    0x82    /* ISO8859 Latin 2 */
00065 #define LC_ISO8859_3    0x83    /* ISO8859 Latin 3 */
00066 #define LC_ISO8859_4    0x84    /* ISO8859 Latin 4 */
00067 #define LC_TIS620       0x85            /* Thai (not supported yet) */
00068 #define LC_ISO8859_7    0x86    /* Greek (not supported yet) */
00069 #define LC_ISO8859_6    0x87    /* Arabic (not supported yet) */
00070 #define LC_ISO8859_8    0x88    /* Hebrew (not supported yet) */
00071 #define LC_JISX0201K    0x89    /* Japanese 1 byte kana */
00072 #define LC_JISX0201R    0x8a    /* Japanese 1 byte Roman */
00073 /* Note that 0x8b seems to be unused as of Emacs 20.7.
00074  * However, there might be a chance that 0x8b could be used
00075  * in later version of Emacs.
00076  */
00077 #define LC_KOI8_R       0x8b            /* Cyrillic KOI8-R */
00078 #define LC_KOI8_U       0x8b            /* Cyrillic KOI8-U */
00079 #define LC_ISO8859_5    0x8c    /* ISO8859 Cyrillic */
00080 #define LC_ISO8859_9    0x8d    /* ISO8859 Latin 5 (not supported yet) */
00081 /* #define FREE         0x8e    free (unused) */
00082 
00083 /*
00084  * Unused
00085  */
00086 #define CONTROL_1       0x8f            /* control characters (unused) */
00087 
00088 /*
00089  * Official multibyte byte encodings (0x90-0x99)
00090  * 0x9a-0x9d are free. 0x9e and 0x9f are reserved.
00091  */
00092 #define LC_JISX0208_1978        0x90    /* Japanese Kanji, old JIS (not supported) */
00093 /* #define FREE         0x90    free (unused) */
00094 #define LC_GB2312_80    0x91    /* Chinese */
00095 #define LC_JISX0208 0x92                /* Japanese Kanji (JIS X 0208) */
00096 #define LC_KS5601       0x93            /* Korean */
00097 #define LC_JISX0212 0x94                /* Japanese Kanji (JIS X 0212) */
00098 #define LC_CNS11643_1   0x95    /* CNS 11643-1992 Plane 1 */
00099 #define LC_CNS11643_2   0x96    /* CNS 11643-1992 Plane 2 */
00100 /* #define FREE         0x97    free (unused) */
00101 #define LC_BIG5_1       0x98            /* Plane 1 Chinese traditional (not supported) */
00102 #define LC_BIG5_2       0x99            /* Plane 1 Chinese traditional (not supported) */
00103 
00104 /*
00105  * Private single byte encodings (0xa0-0xef)
00106  */
00107 #define LC_SISHENG      0xa0            /* Chinese SiSheng characters for
00108                                                                  * PinYin/ZhuYin (not supported) */
00109 #define LC_IPA          0xa1            /* IPA (International Phonetic Association)
00110                                                                  * (not supported) */
00111 #define LC_VISCII_LOWER 0xa2    /* Vietnamese VISCII1.1 lower-case (not
00112                                                                  * supported) */
00113 #define LC_VISCII_UPPER 0xa3    /* Vietnamese VISCII1.1 upper-case (not
00114                                                                  * supported) */
00115 #define LC_ARABIC_DIGIT 0xa4    /* Arabic digit (not supported) */
00116 #define LC_ARABIC_1_COLUMN      0xa5    /* Arabic 1-column (not supported) */
00117 #define LC_ASCII_RIGHT_TO_LEFT  0xa6    /* ASCII (left half of ISO8859-1) with
00118                                                                                  * right-to-left direction (not
00119                                                                                  * supported) */
00120 #define LC_LAO          0xa7            /* Lao characters (ISO10646 0E80..0EDF) (not
00121                                                                  * supported) */
00122 #define LC_ARABIC_2_COLUMN      0xa8    /* Arabic 1-column (not supported) */
00123 
00124 /*
00125  * Private multibyte encodings (0xf0-0xff)
00126  */
00127 #define LC_INDIAN_1_COLUMN      0xf0/* Indian charset for 1-column width glypps
00128                                                                  * (not supported) */
00129 #define LC_TIBETAN_1_COLUMN 0xf1        /* Tibetan 1 column glyph (not supported) */
00130 #define LC_ETHIOPIC 0xf5                /* Ethiopic characters (not supported) */
00131 #define LC_CNS11643_3   0xf6    /* CNS 11643-1992 Plane 3 */
00132 #define LC_CNS11643_4   0xf7    /* CNS 11643-1992 Plane 4 */
00133 #define LC_CNS11643_5   0xf8    /* CNS 11643-1992 Plane 5 */
00134 #define LC_CNS11643_6   0xf9    /* CNS 11643-1992 Plane 6 */
00135 #define LC_CNS11643_7   0xfa    /* CNS 11643-1992 Plane 7 */
00136 #define LC_INDIAN_2_COLUMN      0xfb/* Indian charset for 2-column width glypps
00137                                                                  * (not supported) */
00138 #define LC_TIBETAN      0xfc            /* Tibetan (not supported) */
00139 /* #define FREE         0xfd    free (unused) */
00140 /* #define FREE         0xfe    free (unused) */
00141 /* #define FREE         0xff    free (unused) */
00142 
00143 /*
00144  * PostgreSQL encoding identifiers
00145  *
00146  * WARNING: the order of this table must be same as order
00147  *                      in the pg_enc2name[] (mb/encnames.c) array!
00148  *
00149  *                      If you add some encoding don't forget to check
00150  *                      PG_ENCODING_BE_LAST macro.
00151  *
00152  *              The PG_SQL_ASCII is default encoding and must be = 0.
00153  */
00154 typedef enum pg_enc
00155 {
00156         PG_SQL_ASCII = 0,                       /* SQL/ASCII */
00157         PG_EUC_JP,                                      /* EUC for Japanese */
00158         PG_EUC_CN,                                      /* EUC for Chinese */
00159         PG_EUC_KR,                                      /* EUC for Korean */
00160         PG_EUC_TW,                                      /* EUC for Taiwan */
00161         PG_JOHAB,                                       /* EUC for Korean JOHAB */
00162         PG_UTF8,                                        /* Unicode UTF8 */
00163         PG_MULE_INTERNAL,                       /* Mule internal code */
00164         PG_LATIN1,                                      /* ISO-8859-1 Latin 1 */
00165         PG_LATIN2,                                      /* ISO-8859-2 Latin 2 */
00166         PG_LATIN3,                                      /* ISO-8859-3 Latin 3 */
00167         PG_LATIN4,                                      /* ISO-8859-4 Latin 4 */
00168         PG_LATIN5,                                      /* ISO-8859-9 Latin 5 */
00169         PG_LATIN6,                                      /* ISO-8859-10 Latin6 */
00170         PG_LATIN7,                                      /* ISO-8859-13 Latin7 */
00171         PG_LATIN8,                                      /* ISO-8859-14 Latin8 */
00172         PG_LATIN9,                                      /* ISO-8859-15 Latin9 */
00173         PG_LATIN10,                                     /* ISO-8859-16 Latin10 */
00174         PG_WIN1256,                                     /* windows-1256 */
00175         PG_WIN1258,                                     /* Windows-1258 */
00176         PG_WIN866,                                      /* (MS-DOS CP866) */
00177         PG_WIN874,                                      /* windows-874 */
00178         PG_KOI8R,                                       /* KOI8-R */
00179         PG_WIN1251,                                     /* windows-1251 */
00180         PG_WIN1252,                                     /* windows-1252 */
00181         PG_ISO_8859_5,                          /* ISO-8859-5 */
00182         PG_ISO_8859_6,                          /* ISO-8859-6 */
00183         PG_ISO_8859_7,                          /* ISO-8859-7 */
00184         PG_ISO_8859_8,                          /* ISO-8859-8 */
00185         PG_WIN1250,                                     /* windows-1250 */
00186 
00187         /* followings are for client encoding only */
00188         PG_SJIS,                                        /* Shift JIS (Winindows-932) */
00189         PG_BIG5,                                        /* Big5 (Windows-950) */
00190         PG_GBK,                                         /* GBK (Windows-936) */
00191         PG_UHC,                                         /* UHC (Windows-949) */
00192         PG_GB18030,                                     /* GB18030 */
00193         _PG_LAST_ENCODING_                      /* mark only */
00194 
00195 } pg_enc;
00196 
00197 #define PG_ENCODING_BE_LAST PG_WIN1250
00198 
00199 /*
00200  * Please use these tests before access to pg_encconv_tbl[]
00201  * or to other places...
00202  */
00203 #define PG_VALID_BE_ENCODING(_enc) \
00204                 ((_enc) >= 0 && (_enc) <= PG_ENCODING_BE_LAST)
00205 
00206 #define PG_ENCODING_IS_CLIENT_ONLY(_enc) \
00207                 ((_enc) > PG_ENCODING_BE_LAST && (_enc) < _PG_LAST_ENCODING_)
00208 
00209 #define PG_VALID_ENCODING(_enc) \
00210                 ((_enc) >= 0 && (_enc) < _PG_LAST_ENCODING_)
00211 
00212 /* On FE are possible all encodings */
00213 #define PG_VALID_FE_ENCODING(_enc)      PG_VALID_ENCODING(_enc)
00214 
00215 /*
00216  * Encoding names with all aliases
00217  */
00218 typedef struct pg_encname
00219 {
00220         char       *name;
00221         pg_enc          encoding;
00222 } pg_encname;
00223 
00224 extern pg_encname pg_encname_tbl[];
00225 extern unsigned int pg_encname_tbl_sz;
00226 
00227 /*
00228  * Careful:
00229  *
00230  * if (PG_VALID_ENCODING(encoding))
00231  *              pg_enc2name_tbl[ encoding ];
00232  */
00233 typedef struct pg_enc2name
00234 {
00235         char       *name;
00236         pg_enc          encoding;
00237 } pg_enc2name;
00238 
00239 extern pg_enc2name pg_enc2name_tbl[];
00240 
00241 extern pg_encname *pg_char_to_encname_struct(const char *name);
00242 
00243 extern int      pg_char_to_encoding(const char *s);
00244 extern const char *pg_encoding_to_char(int encoding);
00245 
00246 /*
00247  * pg_wchar stuff
00248  */
00249 typedef int (*mb2wchar_with_len_converter) (const unsigned char *from,
00250                                                                                                                 pg_wchar *to,
00251                                                                                                                 int len);
00252 
00253 typedef int (*mblen_converter) (const unsigned char *mbstr);
00254 
00255 typedef int (*mbdisplaylen_converter) (const unsigned char *mbstr);
00256 
00257 typedef int (*mbverifier) (const unsigned char *mbstr, int len);
00258 
00259 typedef struct
00260 {
00261         mb2wchar_with_len_converter mb2wchar_with_len;          /* convert a multibyte
00262                                                                                                                  * string to a wchar */
00263         mblen_converter mblen;          /* get byte length of a char */
00264         mbdisplaylen_converter dsplen;          /* get display width of a char */
00265         mbverifier      mbverify;               /* verify multibyte sequence */
00266         int                     maxmblen;               /* max bytes for a char in this encoding */
00267 } pg_wchar_tbl;
00268 
00269 extern pg_wchar_tbl pg_wchar_table[];
00270 
00271 /*
00272  * UTF8 to local code conversion map
00273  */
00274 typedef struct
00275 {
00276         unsigned int utf;                       /* UTF8 */
00277         unsigned int code;                      /* local code */
00278 } pg_utf_to_local;
00279 
00280 /*
00281  * local code to UTF8 conversion map
00282  */
00283 typedef struct
00284 {
00285         unsigned int code;                      /* local code */
00286         unsigned int utf;                       /* UTF8 */
00287 } pg_local_to_utf;
00288 
00289 extern int      pg_mb2wchar(const char *from, pg_wchar *to);
00290 extern int      pg_mb2wchar_with_len(const char *from, pg_wchar *to, int len);
00291 extern int      pg_char_and_wchar_strcmp(const char *s1, const pg_wchar *s2);
00292 extern int      pg_wchar_strncmp(const pg_wchar *s1, const pg_wchar *s2, size_t n);
00293 extern int      pg_char_and_wchar_strncmp(const char *s1, const pg_wchar *s2, size_t n);
00294 extern size_t pg_wchar_strlen(const pg_wchar *wstr);
00295 extern int      pg_mblen(const char *mbstr);
00296 extern int      pg_dsplen(const char *mbstr);
00297 extern int      pg_encoding_mblen(int encoding, const char *mbstr);
00298 extern int      pg_encoding_dsplen(int encoding, const char *mbstr);
00299 extern int      pg_encoding_verifymb(int encoding, const char *mbstr, int len);
00300 extern int      pg_mule_mblen(const unsigned char *mbstr);
00301 extern int      pg_mic_mblen(const unsigned char *mbstr);
00302 extern int      pg_mbstrlen(const char *mbstr);
00303 extern int      pg_mbstrlen_with_len(const char *mbstr, int len);
00304 extern int      pg_mbcliplen(const char *mbstr, int len, int limit);
00305 extern int      pg_mbcharcliplen(const char *mbstr, int len, int imit);
00306 extern int      pg_encoding_max_length(int encoding);
00307 extern int      pg_database_encoding_max_length(void);
00308 
00309 extern void SetDefaultClientEncoding(void);
00310 extern int      SetClientEncoding(int encoding, bool doit);
00311 extern void InitializeClientEncoding(void);
00312 extern int      pg_get_client_encoding(void);
00313 extern const char *pg_get_client_encoding_name(void);
00314 
00315 extern void SetDatabaseEncoding(int encoding);
00316 extern int      GetDatabaseEncoding(void);
00317 extern const char *GetDatabaseEncodingName(void);
00318 
00319 extern int      pg_valid_client_encoding(const char *name);
00320 extern int      pg_valid_server_encoding(const char *name);
00321 
00322 extern int      pg_utf_mblen(const unsigned char *);
00323 extern unsigned char *pg_do_encoding_conversion(unsigned char *src, int len,
00324                                                   int src_encoding,
00325                                                   int dest_encoding);
00326 
00327 extern char *pg_client_to_server(const char *s, int len);
00328 extern char *pg_server_to_client(const char *s, int len);
00329 
00330 extern unsigned short BIG5toCNS(unsigned short big5, unsigned char *lc);
00331 extern unsigned short CNStoBIG5(unsigned short cns, unsigned char lc);
00332 
00333 extern void LocalToUtf(const unsigned char *iso, unsigned char *utf,
00334                    const pg_local_to_utf *map, int size, int encoding, int len);
00335 
00336 extern void UtfToLocal(const unsigned char *utf, unsigned char *iso,
00337                    const pg_utf_to_local *map, int size, int encoding, int len);
00338 
00339 extern bool pg_verifymbstr(const char *mbstr, int len, bool noError);
00340 extern bool pg_verify_mbstr(int encoding, const char *mbstr, int len,
00341                                                         bool noError);
00342 
00343 extern void report_invalid_encoding(int encoding, const char *mbstr, int len);
00344 extern void report_untranslatable_char(int src_encoding, int dest_encoding,
00345                                                                            const char *mbstr, int len);
00346 
00347 extern void pg_ascii2mic(const unsigned char *l, unsigned char *p, int len);
00348 extern void pg_mic2ascii(const unsigned char *mic, unsigned char *p, int len);
00349 extern void latin2mic(const unsigned char *l, unsigned char *p, int len,
00350                                           int lc, int encoding);
00351 extern void mic2latin(const unsigned char *mic, unsigned char *p, int len,
00352                                           int lc, int encoding);
00353 extern void latin2mic_with_table(const unsigned char *l, unsigned char *p,
00354                                                                  int len, int lc, int encoding,
00355                                                                  const unsigned char *tab);
00356 extern void mic2latin_with_table(const unsigned char *mic, unsigned char *p,
00357                                                                  int len, int lc, int encoding,
00358                                                                  const unsigned char *tab);
00359 
00360 extern bool pg_utf8_islegal(const unsigned char *source, int length);
00361 
00362 #endif   /* PG_WCHAR_H */
 Todo Clases Namespaces Archivos Funciones Variables 'typedefs' Enumeraciones Valores de enumeraciones Propiedades Amigas 'defines'