diff options
author | Tom Lane <tgl@sss.pgh.pa.us> | 2006-07-28 15:33:28 +0000 |
---|---|---|
committer | Tom Lane <tgl@sss.pgh.pa.us> | 2006-07-28 15:33:28 +0000 |
commit | 87953b3d6085a8cf666c321a458c5e5536787a0d (patch) | |
tree | 0f5037baaf6880db26ebeb52d34f8e04ab715253 | |
parent | 6779710f261efb644cffc6f840670dab82314c31 (diff) | |
download | postgresql-87953b3d6085a8cf666c321a458c5e5536787a0d.tar.gz |
Make it clearer that not every Postgres character set can be used as a
server-side character set.
-rw-r--r-- | doc/src/sgml/charset.sgml | 73 |
1 files changed, 53 insertions, 20 deletions
diff --git a/doc/src/sgml/charset.sgml b/doc/src/sgml/charset.sgml index aee43acfd1..bdf64bf7a6 100644 --- a/doc/src/sgml/charset.sgml +++ b/doc/src/sgml/charset.sgml @@ -1,4 +1,4 @@ -<!-- $PostgreSQL: pgsql/doc/src/sgml/charset.sgml,v 2.75 2005/11/04 23:13:59 petere Exp $ --> +<!-- $PostgreSQL: pgsql/doc/src/sgml/charset.sgml,v 2.75.2.1 2006/07/28 15:33:28 tgl Exp $ --> <chapter id="charset"> <title>Localization</> @@ -304,14 +304,13 @@ initdb --locale=sv_SE allows you to store text in a variety of character sets, including single-byte character sets such as the ISO 8859 series and multiple-byte character sets such as <acronym>EUC</> (Extended Unix - Code), UTF-8, and Mule internal code. All character sets can be - used transparently throughout the server. (If you use extension - functions from other sources, it depends on whether they wrote - their code correctly.) The default character set is selected while + Code), UTF-8, and Mule internal code. All supported character sets + can be used transparently by clients, but a few are not supported + for use within the server (that is, as a server-side encoding). + The default character set is selected while initializing your <productname>PostgreSQL</productname> database cluster using <command>initdb</>. It can be overridden when you - create a database using <command>createdb</command> or by using the - SQL command <command>CREATE DATABASE</>. So you can have multiple + create a database, so you can have multiple databases each with a different character set. </para> @@ -320,17 +319,18 @@ initdb --locale=sv_SE <para> <xref linkend="charset-table"> shows the character sets available - for use in the server. + for use in <productname>PostgreSQL</productname>. </para> <table id="charset-table"> <title>Server Character Sets</title> - <tgroup cols="2"> + <tgroup cols="6"> <thead> <row> <entry>Name</entry> <entry>Description</entry> <entry>Language</entry> + <entry>Server?</entry> <!-- The Bytes/Char field is populated by looking at the values returned by pg_wchar_table.mblen function for each encoding. @@ -344,6 +344,7 @@ initdb --locale=sv_SE <entry><literal>BIG5</literal></entry> <entry>Big Five</entry> <entry>Traditional Chinese</entry> + <entry>No</entry> <entry>1-2</entry> <entry><literal>WIN950</>, <literal>Windows950</></entry> </row> @@ -351,6 +352,7 @@ initdb --locale=sv_SE <entry><literal>EUC_CN</literal></entry> <entry>Extended UNIX Code-CN</entry> <entry>Simplified Chinese</entry> + <entry>Yes</entry> <entry>1-3</entry> <entry></entry> </row> @@ -358,6 +360,7 @@ initdb --locale=sv_SE <entry><literal>EUC_JP</literal></entry> <entry>Extended UNIX Code-JP</entry> <entry>Japanese</entry> + <entry>Yes</entry> <entry>1-3</entry> <entry></entry> </row> @@ -365,6 +368,7 @@ initdb --locale=sv_SE <entry><literal>EUC_KR</literal></entry> <entry>Extended UNIX Code-KR</entry> <entry>Korean</entry> + <entry>Yes</entry> <entry>1-3</entry> <entry></entry> </row> @@ -372,6 +376,7 @@ initdb --locale=sv_SE <entry><literal>EUC_TW</literal></entry> <entry>Extended UNIX Code-TW</entry> <entry>Traditional Chinese, Taiwanese</entry> + <entry>Yes</entry> <entry>1-3</entry> <entry></entry> </row> @@ -379,6 +384,7 @@ initdb --locale=sv_SE <entry><literal>GB18030</literal></entry> <entry>National Standard</entry> <entry>Chinese</entry> + <entry>No</entry> <entry>1-2</entry> <entry></entry> </row> @@ -386,6 +392,7 @@ initdb --locale=sv_SE <entry><literal>GBK</literal></entry> <entry>Extended National Standard</entry> <entry>Simplified Chinese</entry> + <entry>No</entry> <entry>1-2</entry> <entry><literal>WIN936</>, <literal>Windows936</></entry> </row> @@ -393,6 +400,7 @@ initdb --locale=sv_SE <entry><literal>ISO_8859_5</literal></entry> <entry>ISO 8859-5, <acronym>ECMA</> 113</entry> <entry>Latin/Cyrillic</entry> + <entry>Yes</entry> <entry>1</entry> <entry></entry> </row> @@ -400,6 +408,7 @@ initdb --locale=sv_SE <entry><literal>ISO_8859_6</literal></entry> <entry>ISO 8859-6, <acronym>ECMA</> 114</entry> <entry>Latin/Arabic</entry> + <entry>Yes</entry> <entry>1</entry> <entry></entry> </row> @@ -407,6 +416,7 @@ initdb --locale=sv_SE <entry><literal>ISO_8859_7</literal></entry> <entry>ISO 8859-7, <acronym>ECMA</> 118</entry> <entry>Latin/Greek</entry> + <entry>Yes</entry> <entry>1</entry> <entry></entry> </row> @@ -414,6 +424,7 @@ initdb --locale=sv_SE <entry><literal>ISO_8859_8</literal></entry> <entry>ISO 8859-8, <acronym>ECMA</> 121</entry> <entry>Latin/Hebrew</entry> + <entry>Yes</entry> <entry>1</entry> <entry></entry> </row> @@ -421,6 +432,7 @@ initdb --locale=sv_SE <entry><literal>JOHAB</literal></entry> <entry><acronym>JOHAB</></entry> <entry>Korean (Hangul)</entry> + <entry>Yes</entry> <entry>1-3</entry> <entry></entry> </row> @@ -428,6 +440,7 @@ initdb --locale=sv_SE <entry><literal>KOI8</literal></entry> <entry><acronym>KOI</acronym>8-R(U)</entry> <entry>Cyrillic</entry> + <entry>Yes</entry> <entry>1</entry> <entry><literal>KOI8R</></entry> </row> @@ -435,6 +448,7 @@ initdb --locale=sv_SE <entry><literal>LATIN1</literal></entry> <entry>ISO 8859-1, <acronym>ECMA</> 94</entry> <entry>Western European</entry> + <entry>Yes</entry> <entry>1</entry> <entry><literal>ISO88591</></entry> </row> @@ -442,6 +456,7 @@ initdb --locale=sv_SE <entry><literal>LATIN2</literal></entry> <entry>ISO 8859-2, <acronym>ECMA</> 94</entry> <entry>Central European</entry> + <entry>Yes</entry> <entry>1</entry> <entry><literal>ISO88592</></entry> </row> @@ -449,6 +464,7 @@ initdb --locale=sv_SE <entry><literal>LATIN3</literal></entry> <entry>ISO 8859-3, <acronym>ECMA</> 94</entry> <entry>South European</entry> + <entry>Yes</entry> <entry>1</entry> <entry><literal>ISO88593</></entry> </row> @@ -456,6 +472,7 @@ initdb --locale=sv_SE <entry><literal>LATIN4</literal></entry> <entry>ISO 8859-4, <acronym>ECMA</> 94</entry> <entry>North European</entry> + <entry>Yes</entry> <entry>1</entry> <entry><literal>ISO88594</></entry> </row> @@ -463,6 +480,7 @@ initdb --locale=sv_SE <entry><literal>LATIN5</literal></entry> <entry>ISO 8859-9, <acronym>ECMA</> 128</entry> <entry>Turkish</entry> + <entry>Yes</entry> <entry>1</entry> <entry><literal>ISO88599</></entry> </row> @@ -470,6 +488,7 @@ initdb --locale=sv_SE <entry><literal>LATIN6</literal></entry> <entry>ISO 8859-10, <acronym>ECMA</> 144</entry> <entry>Nordic</entry> + <entry>Yes</entry> <entry>1</entry> <entry><literal>ISO885910</></entry> </row> @@ -477,6 +496,7 @@ initdb --locale=sv_SE <entry><literal>LATIN7</literal></entry> <entry>ISO 8859-13</entry> <entry>Baltic</entry> + <entry>Yes</entry> <entry>1</entry> <entry><literal>ISO885913</></entry> </row> @@ -484,6 +504,7 @@ initdb --locale=sv_SE <entry><literal>LATIN8</literal></entry> <entry>ISO 8859-14</entry> <entry>Celtic</entry> + <entry>Yes</entry> <entry>1</entry> <entry><literal>ISO885914</></entry> </row> @@ -491,6 +512,7 @@ initdb --locale=sv_SE <entry><literal>LATIN9</literal></entry> <entry>ISO 8859-15</entry> <entry>LATIN1 with Euro and accents</entry> + <entry>Yes</entry> <entry>1</entry> <entry>ISO885915</entry> </row> @@ -498,6 +520,7 @@ initdb --locale=sv_SE <entry><literal>LATIN10</literal></entry> <entry>ISO 8859-16, <acronym>ASRO</> SR 14111</entry> <entry>Romanian</entry> + <entry>Yes</entry> <entry>1</entry> <entry><literal>ISO885916</></entry> </row> @@ -505,6 +528,7 @@ initdb --locale=sv_SE <entry><literal>MULE_INTERNAL</literal></entry> <entry>Mule internal code</entry> <entry>Multilingual Emacs</entry> + <entry>Yes</entry> <entry>1-4</entry> <entry></entry> </row> @@ -512,6 +536,7 @@ initdb --locale=sv_SE <entry><literal>SJIS</literal></entry> <entry>Shift JIS</entry> <entry>Japanese</entry> + <entry>No</entry> <entry>1-2</entry> <entry><literal>Mskanji</>, <literal>ShiftJIS</>, <literal>WIN932</>, <literal>Windows932</></entry> </row> @@ -519,6 +544,7 @@ initdb --locale=sv_SE <entry><literal>SQL_ASCII</literal></entry> <entry>unspecified (see text)</entry> <entry><emphasis>any</></entry> + <entry>Yes</entry> <entry>1</entry> <entry></entry> </row> @@ -526,6 +552,7 @@ initdb --locale=sv_SE <entry><literal>UHC</literal></entry> <entry>Unified Hangul Code</entry> <entry>Korean</entry> + <entry>No</entry> <entry>1-2</entry> <entry><literal>WIN949</>, <literal>Windows949</></entry> </row> @@ -533,6 +560,7 @@ initdb --locale=sv_SE <entry><literal>UTF8</literal></entry> <entry>Unicode, 8-bit</entry> <entry><emphasis>all</></entry> + <entry>Yes</entry> <entry>1-4</entry> <entry><literal>Unicode</></entry> </row> @@ -540,6 +568,7 @@ initdb --locale=sv_SE <entry><literal>WIN866</literal></entry> <entry>Windows CP866</entry> <entry>Cyrillic</entry> + <entry>Yes</entry> <entry>1</entry> <entry><literal>ALT</></entry> </row> @@ -547,6 +576,7 @@ initdb --locale=sv_SE <entry><literal>WIN874</literal></entry> <entry>Windows CP874</entry> <entry>Thai</entry> + <entry>Yes</entry> <entry>1</entry> <entry></entry> </row> @@ -554,6 +584,7 @@ initdb --locale=sv_SE <entry><literal>WIN1250</literal></entry> <entry>Windows CP1250</entry> <entry>Central European</entry> + <entry>Yes</entry> <entry>1</entry> <entry></entry> </row> @@ -561,6 +592,7 @@ initdb --locale=sv_SE <entry><literal>WIN1251</literal></entry> <entry>Windows CP1251</entry> <entry>Cyrillic</entry> + <entry>Yes</entry> <entry>1</entry> <entry><literal>WIN</></entry> </row> @@ -568,6 +600,7 @@ initdb --locale=sv_SE <entry><literal>WIN1252</literal></entry> <entry>Windows CP1252</entry> <entry>Western European</entry> + <entry>Yes</entry> <entry>1</entry> <entry></entry> </row> @@ -575,6 +608,7 @@ initdb --locale=sv_SE <entry><literal>WIN1256</literal></entry> <entry>Windows CP1256</entry> <entry>Arabic</entry> + <entry>Yes</entry> <entry>1</entry> <entry></entry> </row> @@ -582,6 +616,7 @@ initdb --locale=sv_SE <entry><literal>WIN1258</literal></entry> <entry>Windows CP1258</entry> <entry>Vietnamese</entry> + <entry>Yes</entry> <entry>1</entry> <entry><literal>ABC</>, <literal>TCVN</>, <literal>TCVN5712</>, <literal>VSCII</></entry> </row> @@ -703,12 +738,11 @@ $ <userinput>psql -l</userinput> <para> <productname>PostgreSQL</productname> supports automatic character set conversion between server and client for certain - character sets. The conversion information is stored in the - <literal>pg_conversion</> system catalog. You can create a new - conversion by using the SQL command <command>CREATE - CONVERSION</command>. <productname>PostgreSQL</> comes with some - predefined conversions. They are listed in <xref - linkend="multibyte-translation-table">. + character set combinations. The conversion information is stored in the + <literal>pg_conversion</> system catalog. <productname>PostgreSQL</> + comes with some predefined conversions, as shown in <xref + linkend="multibyte-translation-table">. You can create a new + conversion using the SQL command <command>CREATE CONVERSION</command>. </para> <table id="multibyte-translation-table"> @@ -1029,7 +1063,8 @@ char *pg_encoding_to_char(int <replaceable>encoding_id</replaceable>); SET CLIENT_ENCODING TO '<replaceable>value</>'; </programlisting> - Also you can use the more standard SQL syntax <literal>SET NAMES</literal> for this purpose: + Also you can use the standard SQL syntax <literal>SET NAMES</literal> + for this purpose: <programlisting> SET NAMES '<replaceable>value</>'; @@ -1078,10 +1113,8 @@ RESET client_encoding; If the conversion of a particular character is not possible — suppose you chose <literal>EUC_JP</literal> for the server and <literal>LATIN1</literal> for the client, then some - Japanese characters cannot be converted to - <literal>LATIN1</literal> — it is transformed to its - hexadecimal byte values in parentheses, e.g., - <literal>(826C)</literal>. + Japanese characters do not have a representation in + <literal>LATIN1</literal> — then an error is reported. </para> <para> |