diff options
author | unknown <bar@bar.mysql.r18.ru> | 2003-01-03 14:35:32 +0400 |
---|---|---|
committer | unknown <bar@bar.mysql.r18.ru> | 2003-01-03 14:35:32 +0400 |
commit | 389cec7d99e85b3dbf3827ca70e8e79f0898b4db (patch) | |
tree | e71d0eded8f98db0c52b040703559d19629d5f86 | |
parent | ac0bb91fb12afda09902d08333ffb6522fabcd14 (diff) | |
download | mariadb-git-389cec7d99e85b3dbf3827ca70e8e79f0898b4db.tar.gz |
Charset index is sotred in XML now
-rw-r--r-- | libmysql/Makefile.shared | 2 | ||||
-rw-r--r-- | mysys/charset.c | 193 | ||||
-rw-r--r-- | sql/share/Makefile.am | 4 | ||||
-rw-r--r-- | sql/share/charsets/Index.xml | 455 |
4 files changed, 612 insertions, 42 deletions
diff --git a/libmysql/Makefile.shared b/libmysql/Makefile.shared index a2e6fddff0f..4d8b703fb2d 100644 --- a/libmysql/Makefile.shared +++ b/libmysql/Makefile.shared @@ -58,7 +58,7 @@ mysysobjects1 = my_init.lo my_static.lo my_malloc.lo my_realloc.lo \ mf_loadpath.lo my_pthread.lo my_thr_init.lo \ thr_mutex.lo mulalloc.lo string.lo default.lo \ my_compress.lo array.lo my_once.lo list.lo my_net.lo \ - charset.lo hash.lo mf_iocache.lo \ + charset.lo xml.lo hash.lo mf_iocache.lo \ mf_iocache2.lo my_seek.lo \ my_pread.lo mf_cache.lo my_vsnprintf.lo md5.lo sha1.lo\ my_getopt.lo my_gethostbyname.lo my_port.lo diff --git a/mysys/charset.c b/mysys/charset.c index cf0628495fc..3f650bbe14d 100644 --- a/mysys/charset.c +++ b/mysys/charset.c @@ -19,7 +19,9 @@ #include <m_ctype.h> #include <m_string.h> #include <my_dir.h> +#include <my_xml.h> +#define MY_CHARSET_INDEX "Index.xml" const char *charsets_dir = NULL; static int charset_initialized=0; @@ -85,53 +87,166 @@ char *get_charsets_dir(char *buf) } -static my_bool read_charset_index(myf myflags) +#define MAX_BUF 1024*16 + + +static void mstr(char *str,const char *src,uint l1,uint l2) { - struct simpleconfig_buf_st fb; - char buf[MAX_LINE], num_buf[MAX_LINE]; - - strmov(get_charsets_dir(buf), "Index"); + l1 = l1<l2 ? l1 : l2; + memcpy(str,src,l1); + str[l1]='\0'; +} - if ((fb.f = my_fopen(buf, O_RDONLY, myflags)) == NULL) - return TRUE; - fb.buf[0] = '\0'; - fb.p = fb.buf; +struct my_cs_file_section_st +{ + int state; + const char *str; +}; + +#define _CS_MISC 1 +#define _CS_ID 2 +#define _CS_NAME 3 +#define _CS_FAMILY 4 +#define _CS_ORDER 5 +#define _CS_COLNAME 6 +#define _CS_FLAG 7 +#define _CS_CHARSET 8 +#define _CS_COLLATION 9 + +static struct my_cs_file_section_st sec[] = +{ + {_CS_MISC, "xml"}, + {_CS_MISC, "xml.version"}, + {_CS_MISC, "xml.encoding"}, + {_CS_MISC, "charsets"}, + {_CS_MISC, "charsets.max-id"}, + {_CS_MISC, "charsets.description"}, + {_CS_CHARSET, "charsets.charset"}, + {_CS_NAME, "charsets.charset.name"}, + {_CS_FAMILY, "charsets.charset.family"}, + {_CS_MISC, "charsets.charset.alias"}, + {_CS_COLLATION, "charsets.charset.collation"}, + {_CS_COLNAME, "charsets.charset.collation.name"}, + {_CS_ID, "charsets.charset.collation.id"}, + {_CS_ORDER, "charsets.charset.collation.order"}, + {_CS_FLAG, "charsets.charset.collation.flag"}, + {0, NULL} +}; + +static struct my_cs_file_section_st * cs_file_sec(const char *attr, uint len) +{ + struct my_cs_file_section_st *s; + for (s=sec; s->str; s++) + if (!strncmp(attr,s->str,len)) + return s; + return NULL; +} + +struct my_cs_file_info +{ + CHARSET_INFO cs; + myf myflags; +}; + +static int cs_enter(MY_XML_PARSER *st,const char *attr, uint len) +{ + struct my_cs_file_info *i = (struct my_cs_file_info *)st->user_data; + struct my_cs_file_section_st *s = cs_file_sec(attr,len); - while (!get_word(&fb, buf) && !get_word(&fb, num_buf)) + if ( s && (s->state == _CS_CHARSET)) { - uint csnum; - uint length; - CHARSET_INFO *cs; + bzero(&i->cs,sizeof(i->cs)); + } + return MY_XML_OK; +} - if (!(csnum = atoi(num_buf))) - { - /* corrupt Index file */ - my_fclose(fb.f,myflags); - return TRUE; - } - - if (all_charsets[csnum]) - continue; - - if (!(cs=(CHARSET_INFO*) my_once_alloc(sizeof(cs[0]),myflags))) - { - my_fclose(fb.f,myflags); - return TRUE; - } - bzero(cs,sizeof(cs[0])); - - if (!(cs->name= (char*)my_once_alloc(length=(uint)strlen(buf)+1,myflags))) +static int cs_leave(MY_XML_PARSER *st,const char *attr, uint len) +{ + struct my_cs_file_info *i = (struct my_cs_file_info *)st->user_data; + struct my_cs_file_section_st *s = cs_file_sec(attr,len); + + if (s && (s->state == _CS_COLLATION) && !all_charsets[i->cs.number]) + { + if (!(all_charsets[i->cs.number]= + (CHARSET_INFO*) my_once_alloc(sizeof(CHARSET_INFO),i->myflags))) { - my_fclose(fb.f,myflags); - return TRUE; + return MY_XML_ERROR; } - memcpy((char*)cs->name,buf,length); - cs->number=csnum; - all_charsets[csnum]=cs; + all_charsets[i->cs.number][0]=i->cs; + } + return MY_XML_OK; +} + +static int cs_value(MY_XML_PARSER *st,const char *attr, uint len) +{ + struct my_cs_file_info *i = (struct my_cs_file_info *)st->user_data; + struct my_cs_file_section_st *s; + int state = (s=cs_file_sec(st->attr,strlen(st->attr))) ? s->state : 0; + + if(0) + { + char str[256]; + mstr(str,attr,len,sizeof(str)-1); + printf("VALUE %d %s='%s'\n",state,st->attr,str); + } + + switch (state) + { + case _CS_ID: + i->cs.number = my_strntoul(my_charset_latin1,attr,len,(char**)NULL,0); + break; + case _CS_COLNAME: + if ((i->cs.name = (char*) my_once_alloc(len+1,i->myflags))) + { + memcpy((char*)i->cs.name,attr,len); + ((char*)(i->cs.name))[len]='\0'; + } + break; } - my_fclose(fb.f,myflags); + return MY_XML_OK; +} +static my_bool read_charset_index(myf myflags) +{ + char *buf; + int fd; + uint len; + MY_XML_PARSER p; + struct my_cs_file_info i; + + if (! (buf = (char *)my_malloc(MAX_BUF,myflags))) + return FALSE; + + strmov(get_charsets_dir(buf),MY_CHARSET_INDEX); + + if ((fd=my_open(buf,O_RDONLY,myflags)) < 0) + { + my_free(buf,myflags); + return TRUE; + } + + len=read(fd,buf,MAX_BUF); + my_xml_parser_create(&p); + my_close(fd,myflags); + + my_xml_set_enter_handler(&p,cs_enter); + my_xml_set_value_handler(&p,cs_value); + my_xml_set_leave_handler(&p,cs_leave); + my_xml_set_user_data(&p,(void*)&i); + + if (MY_XML_OK!=my_xml_parse(&p,buf,len)) + { + /* + printf("ERROR at line %d pos %d '%s'\n", + my_xml_error_lineno(&p)+1, + my_xml_error_pos(&p), + my_xml_error_string(&p)); + */ + } + + my_xml_parser_free(&p); + return FALSE; } @@ -472,7 +587,7 @@ CHARSET_INFO *get_charset(uint cs_number, myf flags) if (!cs && (flags & MY_WME)) { char index_file[FN_REFLEN], cs_string[23]; - strmov(get_charsets_dir(index_file), "Index"); + strmov(get_charsets_dir(index_file),MY_CHARSET_INDEX); cs_string[0]='#'; int10_to_str(cs_number, cs_string+1, 10); my_error(EE_UNKNOWN_CHARSET, MYF(ME_BELL), cs_string, index_file); @@ -505,7 +620,7 @@ CHARSET_INFO *get_charset_by_name(const char *cs_name, myf flags) if (!cs && (flags & MY_WME)) { char index_file[FN_REFLEN]; - strmov(get_charsets_dir(index_file), "Index"); + strmov(get_charsets_dir(index_file),MY_CHARSET_INDEX); my_error(EE_UNKNOWN_CHARSET, MYF(ME_BELL), cs_name, index_file); } diff --git a/sql/share/Makefile.am b/sql/share/Makefile.am index c70ac9ccf57..a1b506f1ff5 100644 --- a/sql/share/Makefile.am +++ b/sql/share/Makefile.am @@ -7,7 +7,7 @@ dist-hook: done; \ sleep 1 ; touch $(srcdir)/*/errmsg.sys $(INSTALL_DATA) $(srcdir)/charsets/README $(distdir)/charsets - $(INSTALL_DATA) $(srcdir)/charsets/Index $(distdir)/charsets + $(INSTALL_DATA) $(srcdir)/charsets/Index.xml $(distdir)/charsets all: @AVAILABLE_LANGUAGES_ERRORS@ @@ -25,7 +25,7 @@ install-data-local: done $(mkinstalldirs) $(DESTDIR)$(pkgdatadir)/charsets $(INSTALL_DATA) $(srcdir)/charsets/README $(DESTDIR)$(pkgdatadir)/charsets/README - $(INSTALL_DATA) $(srcdir)/charsets/Index $(DESTDIR)$(pkgdatadir)/charsets/Index + $(INSTALL_DATA) $(srcdir)/charsets/Index.xml $(DESTDIR)$(pkgdatadir)/charsets/Index.xml $(INSTALL_DATA) $(srcdir)/charsets/*.conf $(DESTDIR)$(pkgdatadir)/charsets fix_errors: diff --git a/sql/share/charsets/Index.xml b/sql/share/charsets/Index.xml new file mode 100644 index 00000000000..79b622f9c5c --- /dev/null +++ b/sql/share/charsets/Index.xml @@ -0,0 +1,455 @@ +<?xml version='1.0' encoding="utf-8"?> + +<charsets max-id=63> + +<description> +This file lists all of the available character sets. +To make maintaining easier please: + - keep records sorted by collation number. + - change charset-list.max-id when adding a new collation. +</description> + +<charset name="big5"> + <family>Traditional Chinese</family> + <alias>big-5</alias> + <alias>bigfive</alias> + <alias>big-five</alias> + <alias>cn-big5</alias> + <alias>csbig5</alias> + <collation name="big5" id="1" order="Chinese" flag="primary"/> +</charset> + +<charset name="latin2"> + <family>Central European</family> + <alias>csisolatin2</alias> + <alias>iso-8859-2</alias> + <alias>iso-ir-101</alias> + <alias>iso_8859-2</alias> + <alias>iso_8859-2:1987</alias> + <alias>l2</alias> + <collation name="czech" id="2" order="Czech"/> + <collation name="latin2" id="9" flag="primary"> + <order>Hungarian</order> + <order>Polish</order> + <order>Romanian</order> + <order>Croatian</order> + <order>Slovak</order> + <order>Slovenian</order> + <order>Sorbian</order> + </collation> + <collation name="hungarian" id="21" order="Hungarian"/> + <collation name="croat" id="27" order="Croatian"/> +</charset> + +<charset name="dec8"> + <family>Western</family> + <collation name="dec8" id="3" flag="primary"> + <order>Dutch</order> + <order>English</order> + <order>French</order> + <order>German Duden</order> + <order>Italian</order> + <order>Latin</order> + <order>Pogtuguese</order> + <order>Spanish</order> + </collation> +</charset> + +<charset name="pclatin1"> + <family>Western</family> + <alias>850</alias> + <alias>cp850</alias> + <alias>cspc850multilingual</alias> + <alias>ibm850</alias> + <collation name="dos" id="4" flag="primary"> + <order>Dutch</order> + <order>English</order> + <order>French</order> + <order>German Duden</order> + <order>Italian</order> + <order>Latin</order> + <order>Pogtuguese</order> + <order>Spanish</order> + </collation> +</charset> + +<charset name="latin1"> + <family>Western</family> + <alias>csisolatin1</alias> + <alias>csisolatin1</alias> + <alias>iso-8859-1</alias> + <alias>iso-ir-100</alias> + <alias>iso_8859-1</alias> + <alias>iso_8859-1:1987</alias> + <alias>l1</alias> + <alias>latin1</alias> + <collation name="german1" id="5" order="German Duden"/> + <collation name="latin1" id="8" order="Finnish, Swedish" flag="primary"/> + <collation name="danish" id="15" order="Danish"/> + <collation name="latin1_de" id="31" order="German DIN"/> + <collation name="latin1_bin" id="47" order="Binary"/> + <collation name="latin1_ci_as" id="48"> + <order>Dutch</order> + <order>English</order> + <order>French</order> + <order>German Duden</order> + <order>Italian</order> + <order>Latin</order> + <order>Pogtuguese</order> + <order>Spanish</order> + </collation> + <collation name="latin1_cs_as" id="49"> + <order>Dutch</order> + <order>English</order> + <order>French</order> + <order>German Duden</order> + <order>Italian</order> + <order>Latin</order> + <order>Pogtuguese</order> + <order>Spanish</order> + </collation> +</charset> + +<charset name="hp8"> + <family>Western</family> + <alias>hproman8</alias> + <collation name="hp8" id="6" flag="primary"> + <order>Dutch</order> + <order>English</order> + <order>French</order> + <order>German Duden</order> + <order>Italian</order> + <order>Latin</order> + <order>Pogtuguese</order> + <order>Spanish</order> + </collation> +</charset> + +<charset name="koi8_ru"> + <family>Cyrillic</family> + <alias>koi8-ru</alias> + <alias>cskoi8r</alias> + <collation name="koi8_ru" id="7" order="Russian" flag="primary"/> +</charset> + +<charset name="swe7"> + <family>Western</family> + <alias>iso-646-se</alias> + <collation name="swe7" id="10" order="Swedish" flag="primary"/> +</charset> + +<charset name="ascii"> + <family>Western</family> + <alias>us</alias> + <alias>us-ascii</alias> + <alias>csascii</alias> + <alias>iso-ir-6</alias> + <alias>iso646-us</alias> + <collation name="usa7" id="11" order="Egnlish" flag="primary"/> +</charset> + +<charset name="ujis"> + <family>Japanese</family> + <alias>euc-jp</alias> + <collation name="ujis" id="12" order="Japanese" flag="primary"/> +</charset> + +<charset name="sjis"> + <family>Japanese</family> + <alias>s-jis</alias> + <alias>shift-jis</alias> + <alias>x-sjis</alias> + <collation name="sjis" id="13" order="Japanese" flag="primary"/> +</charset> + +<charset name="cp1251"> + <family>Cyrillic</family> + <alias>windows-1251</alias> + <alias>ms-cyr</alias> + <alias>ms-cyrillic</alias> + <collation name="cp1251" id="14" flag="primary"> + <order>Belarusian</order> + <order>Bulgarian</order> + <order>Macedonian</order> + <order>Russian</order> + <order>Serbian</order> + <order>Mongolian</order> + <order>Ukrainian</order> + </collation> + <collation name="win1251ukr" id="23" order="<Depreciated>"/> + <collation name="cp1251_bin" id="50" order="Binary"/> + <collation name="cp1251_ci_as" id="51"> + <order>Belarusian</order> + <order>Bulgarian</order> + <order>Macedonian</order> + <order>Russian</order> + <order>Serbian</order> + <order>Mongolian</order> + <order>Ukrainian</order> + </collation> + <collation name="cp1251_cs_as" id="52"> + <order>Belarusian</order> + <order>Bulgarian</order> + <order>Macedonian</order> + <order>Russian</order> + <order>Serbian</order> + <order>Mongolian</order> + <order>Ukrainian</order> + </collation> +</charset> + +<charset name="hebrew"> + <family>Hebrew</family> + <alias>csisolatinhebrew</alias> + <alias>iso-8859-8</alias> + <alias>iso-ir-138</alias> + <collation name="hebrew" id="16" order="Hebrew" flag="primary"/> +</charset> + +<charset name="tis620"> + <family>Thai</family> + <alias>tis-620</alias> + <collation name="tis620" id="18" order="Thai" flag="primary"/> +</charset> + +<charset name="euc_kr"> + <family>Korean</family> + <alias>euckr</alias> + <alias>euc-kr</alias> + <collation name="euc_kr" id="19" order="Korean" flag="primary"/> +</charset> + +<charset name="latin7"> + <family>Baltic</family> + <alias>BalticRim</alias> + <alias>iso-8859-13</alias> + <alias>l7</alias> + <collation name="estonia" id="20" order="Estonian" flag="primary"/> + <collation name="latvian" id="41" order="Latvian"/> + <collation name="latvian1" id="42" order="Latvian"/> +</charset> + +<charset name="koi8_ukr"> + <family>Cyrillic</family> + <alias>koi8-u</alias> + <collation name="koi8_ukr" id="22" order="Ukranian" flag="primary"/> +</charset> + +<charset name="gb2312"> + <family>Simplified Chinese</family> + <alias>chinese</alias> + <alias>iso-ir-58</alias> + <collation name="gb2312" id="24" order="Chinese" flag="primary"/> +</charset> + +<charset name="greek"> + <family>Greek</family> + <alias>csisolatingreek</alias> + <alias>ecma-118</alias> + <alias>greek8</alias> + <alias>iso-8859-7</alias> + <alias>iso-ir-126</alias> + <collation name="greek" id="25" order="Greek" flag="primary"/> +</charset> + +<charset name="cp1250"> + <family>Central European</family> + <alias>ms-ce</alias> + <alias>windows-1250</alias> + <collation name="win1250" id="26" flag="primary"> + <order>Hungarian</order> + <order>Polish</order> + <order>Romanian</order> + <order>Croatian</order> + <order>Slovak</order> + <order>Slovenian</order> + <order>Sorbian</order> + </collation> + <collation name="win1250ch" id="34" order="Czech"/> +</charset> + +<charset name="gbk"> + <family>East Asian</family> + <alias>cp936</alias> + <collation name="gbk" id="28" order="Chinese" flag="primary"/> +</charset> + +<charset name="cp1257"> + <family>Baltic</family> + <alias>WinBaltRim</alias> + <alias>windows-1257</alias> + <collation name="cp1257" id="29" order="<Depreciated>"/> + <collation name="cp1257_bin" id="58" order="Binary"/> + <collation name="cp1257_ci_ai" id="59" flag="primary"> + <order>Latvian</order> + <order>Lithuanian</order> + </collation> + <collation name="cp1257_ci_as" id="60"> + <order>Latvian</order> + <order>Lithuanian</order> + </collation> + <collation name="cp1257_cs_as" id="61"> + <order>Latvian</order> + <order>Lithuanian</order> + </collation> +</charset> + +<charset name="latin5"> + <family>South Asian</family> + <alias>csisolatin5</alias> + <alias>iso-8859-9</alias> + <alias>iso-ir-148</alias> + <alias>l5</alias> + <alias>latin5</alias> + <alias>turkish</alias> + <collation name="latin5" id="30" order="Turkish" flag="primary"/> +</charset> + +<charset name="armscii8"> + <family>South Asian</family> + <alias>armscii-8</alias> + <collation name="armscii8" id="32" order="Armenian" flag="primary"/> +</charset> + +<charset name="utf8"> + <family>Unicode</family> + <alias>utf-8</alias> + <collation name="utf8" id="33" flag="primary"/> +</charset> + +<charset name="ucs2"> + <family>Unicode</family> + <collation name="ucs2" id="35" flag="primary"/> +</charset> + +<charset name="cp866"> + <family>Cyrillic</family> + <alias>866</alias> + <alias>csibm866</alias> + <alias>ibm866</alias> + <collation name="cp866" id="36" order="Russian" flag="primary"/> +</charset> + +<charset name="keybcs2"> + <family>Central European</family> + <collation name="keybcs2" id="37" order="Czech" flag="primary"/> +</charset> + +<charset name="MacCE"> + <family>Central European</family> + <alias>MacCentralEurope</alias> + <collation name="macce" id="38" flag="primary"> + <order>Hungarian</order> + <order>Polish</order> + <order>Romanian</order> + <order>Croatian</order> + <order>Slovak</order> + <order>Slovenian</order> + <order>Sorbian</order> + </collation> + <collation name="macce_bin" id="43" order="Binary"/> + <collation name="macce_ci_ai" id="44"> + <order>Hungarian</order> + <order>Polish</order> + <order>Romanian</order> + <order>Croatian</order> + <order>Slovak</order> + <order>Slovenian</order> + <order>Sorbian</order> + </collation> + <collation name="macce_ci_as" id="45"> + <order>Hungarian</order> + <order>Polish</order> + <order>Romanian</order> + <order>Croatian</order> + <order>Slovak</order> + <order>Slovenian</order> + <order>Sorbian</order> + </collation> + <collation name="macce_cs_as" id="46"> + <order>Hungarian</order> + <order>Polish</order> + <order>Romanian</order> + <order>Croatian</order> + <order>Slovak</order> + <order>Slovenian</order> + <order>Sorbian</order> + </collation> +</charset> + +<charset name="MacRoman"> + <family>Western</family> + <alias>Mac</alias> + <alias>Macintosh</alias> + <alias>csmacintosh</alias> + <collation name="macroman" id="39" flag="primary"> + <order>Dutch</order> + <order>English</order> + <order>French</order> + <order>German Duden</order> + <order>Italian</order> + <order>Latin</order> + <order>Pogtuguese</order> + <order>Spanish</order> + </collation> + <collation name="macroman_bin" id="53" order="Binary"/> + <collation name="macroman_ci_as" id="54"> + <order>Dutch</order> + <order>English</order> + <order>French</order> + <order>German Duden</order> + <order>Italian</order> + <order>Latin</order> + <order>Pogtuguese</order> + <order>Spanish</order> + </collation> + <collation name="macroman_ci_ai" id="55"> + <order>Dutch</order> + <order>English</order> + <order>French</order> + <order>German Duden</order> + <order>Italian</order> + <order>Latin</order> + <order>Pogtuguese</order> + <order>Spanish</order> + </collation> + <collation name="macroman_cs_as" id="56"> + <order>Dutch</order> + <order>English</order> + <order>French</order> + <order>German Duden</order> + <order>Italian</order> + <order>Latin</order> + <order>Pogtuguese</order> + <order>Spanish</order> + </collation> +</charset> + +<charset name="pclatin2"> + <family>Central European</family> + <alias>852</alias> + <alias>cp852</alias> + <alias>ibm852</alias> + <collation name="pclatin2" id="40" flag="primary"> + <order>Hungarian</order> + <order>Polish</order> + <order>Romanian</order> + <order>Croatian</order> + <order>Slovak</order> + <order>Slovenian</order> + <order>Sorbian</order> + </collation> +</charset> + +<charset name="cp1256"> + <family>Arabic</family> + <alias>ms-arab</alias> + <alias>windows-1256</alias> + <collation name="cp1256" id="57" order="Arabic" flag="primary"/> +</charset> + +<charset name="binary"> + <collation name="binary" id="63" order="Binary" flag="primary"/> +</charset> + +</charsets> |