summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorTom Lane <tgl@sss.pgh.pa.us>2006-07-28 15:33:28 +0000
committerTom Lane <tgl@sss.pgh.pa.us>2006-07-28 15:33:28 +0000
commit87953b3d6085a8cf666c321a458c5e5536787a0d (patch)
tree0f5037baaf6880db26ebeb52d34f8e04ab715253
parent6779710f261efb644cffc6f840670dab82314c31 (diff)
downloadpostgresql-87953b3d6085a8cf666c321a458c5e5536787a0d.tar.gz
Make it clearer that not every Postgres character set can be used as a
server-side character set.
-rw-r--r--doc/src/sgml/charset.sgml73
1 files changed, 53 insertions, 20 deletions
diff --git a/doc/src/sgml/charset.sgml b/doc/src/sgml/charset.sgml
index aee43acfd1..bdf64bf7a6 100644
--- a/doc/src/sgml/charset.sgml
+++ b/doc/src/sgml/charset.sgml
@@ -1,4 +1,4 @@
-<!-- $PostgreSQL: pgsql/doc/src/sgml/charset.sgml,v 2.75 2005/11/04 23:13:59 petere Exp $ -->
+<!-- $PostgreSQL: pgsql/doc/src/sgml/charset.sgml,v 2.75.2.1 2006/07/28 15:33:28 tgl Exp $ -->
<chapter id="charset">
<title>Localization</>
@@ -304,14 +304,13 @@ initdb --locale=sv_SE
allows you to store text in a variety of character sets, including
single-byte character sets such as the ISO 8859 series and
multiple-byte character sets such as <acronym>EUC</> (Extended Unix
- Code), UTF-8, and Mule internal code. All character sets can be
- used transparently throughout the server. (If you use extension
- functions from other sources, it depends on whether they wrote
- their code correctly.) The default character set is selected while
+ Code), UTF-8, and Mule internal code. All supported character sets
+ can be used transparently by clients, but a few are not supported
+ for use within the server (that is, as a server-side encoding).
+ The default character set is selected while
initializing your <productname>PostgreSQL</productname> database
cluster using <command>initdb</>. It can be overridden when you
- create a database using <command>createdb</command> or by using the
- SQL command <command>CREATE DATABASE</>. So you can have multiple
+ create a database, so you can have multiple
databases each with a different character set.
</para>
@@ -320,17 +319,18 @@ initdb --locale=sv_SE
<para>
<xref linkend="charset-table"> shows the character sets available
- for use in the server.
+ for use in <productname>PostgreSQL</productname>.
</para>
<table id="charset-table">
<title>Server Character Sets</title>
- <tgroup cols="2">
+ <tgroup cols="6">
<thead>
<row>
<entry>Name</entry>
<entry>Description</entry>
<entry>Language</entry>
+ <entry>Server?</entry>
<!--
The Bytes/Char field is populated by looking at the values returned
by pg_wchar_table.mblen function for each encoding.
@@ -344,6 +344,7 @@ initdb --locale=sv_SE
<entry><literal>BIG5</literal></entry>
<entry>Big Five</entry>
<entry>Traditional Chinese</entry>
+ <entry>No</entry>
<entry>1-2</entry>
<entry><literal>WIN950</>, <literal>Windows950</></entry>
</row>
@@ -351,6 +352,7 @@ initdb --locale=sv_SE
<entry><literal>EUC_CN</literal></entry>
<entry>Extended UNIX Code-CN</entry>
<entry>Simplified Chinese</entry>
+ <entry>Yes</entry>
<entry>1-3</entry>
<entry></entry>
</row>
@@ -358,6 +360,7 @@ initdb --locale=sv_SE
<entry><literal>EUC_JP</literal></entry>
<entry>Extended UNIX Code-JP</entry>
<entry>Japanese</entry>
+ <entry>Yes</entry>
<entry>1-3</entry>
<entry></entry>
</row>
@@ -365,6 +368,7 @@ initdb --locale=sv_SE
<entry><literal>EUC_KR</literal></entry>
<entry>Extended UNIX Code-KR</entry>
<entry>Korean</entry>
+ <entry>Yes</entry>
<entry>1-3</entry>
<entry></entry>
</row>
@@ -372,6 +376,7 @@ initdb --locale=sv_SE
<entry><literal>EUC_TW</literal></entry>
<entry>Extended UNIX Code-TW</entry>
<entry>Traditional Chinese, Taiwanese</entry>
+ <entry>Yes</entry>
<entry>1-3</entry>
<entry></entry>
</row>
@@ -379,6 +384,7 @@ initdb --locale=sv_SE
<entry><literal>GB18030</literal></entry>
<entry>National Standard</entry>
<entry>Chinese</entry>
+ <entry>No</entry>
<entry>1-2</entry>
<entry></entry>
</row>
@@ -386,6 +392,7 @@ initdb --locale=sv_SE
<entry><literal>GBK</literal></entry>
<entry>Extended National Standard</entry>
<entry>Simplified Chinese</entry>
+ <entry>No</entry>
<entry>1-2</entry>
<entry><literal>WIN936</>, <literal>Windows936</></entry>
</row>
@@ -393,6 +400,7 @@ initdb --locale=sv_SE
<entry><literal>ISO_8859_5</literal></entry>
<entry>ISO 8859-5, <acronym>ECMA</> 113</entry>
<entry>Latin/Cyrillic</entry>
+ <entry>Yes</entry>
<entry>1</entry>
<entry></entry>
</row>
@@ -400,6 +408,7 @@ initdb --locale=sv_SE
<entry><literal>ISO_8859_6</literal></entry>
<entry>ISO 8859-6, <acronym>ECMA</> 114</entry>
<entry>Latin/Arabic</entry>
+ <entry>Yes</entry>
<entry>1</entry>
<entry></entry>
</row>
@@ -407,6 +416,7 @@ initdb --locale=sv_SE
<entry><literal>ISO_8859_7</literal></entry>
<entry>ISO 8859-7, <acronym>ECMA</> 118</entry>
<entry>Latin/Greek</entry>
+ <entry>Yes</entry>
<entry>1</entry>
<entry></entry>
</row>
@@ -414,6 +424,7 @@ initdb --locale=sv_SE
<entry><literal>ISO_8859_8</literal></entry>
<entry>ISO 8859-8, <acronym>ECMA</> 121</entry>
<entry>Latin/Hebrew</entry>
+ <entry>Yes</entry>
<entry>1</entry>
<entry></entry>
</row>
@@ -421,6 +432,7 @@ initdb --locale=sv_SE
<entry><literal>JOHAB</literal></entry>
<entry><acronym>JOHAB</></entry>
<entry>Korean (Hangul)</entry>
+ <entry>Yes</entry>
<entry>1-3</entry>
<entry></entry>
</row>
@@ -428,6 +440,7 @@ initdb --locale=sv_SE
<entry><literal>KOI8</literal></entry>
<entry><acronym>KOI</acronym>8-R(U)</entry>
<entry>Cyrillic</entry>
+ <entry>Yes</entry>
<entry>1</entry>
<entry><literal>KOI8R</></entry>
</row>
@@ -435,6 +448,7 @@ initdb --locale=sv_SE
<entry><literal>LATIN1</literal></entry>
<entry>ISO 8859-1, <acronym>ECMA</> 94</entry>
<entry>Western European</entry>
+ <entry>Yes</entry>
<entry>1</entry>
<entry><literal>ISO88591</></entry>
</row>
@@ -442,6 +456,7 @@ initdb --locale=sv_SE
<entry><literal>LATIN2</literal></entry>
<entry>ISO 8859-2, <acronym>ECMA</> 94</entry>
<entry>Central European</entry>
+ <entry>Yes</entry>
<entry>1</entry>
<entry><literal>ISO88592</></entry>
</row>
@@ -449,6 +464,7 @@ initdb --locale=sv_SE
<entry><literal>LATIN3</literal></entry>
<entry>ISO 8859-3, <acronym>ECMA</> 94</entry>
<entry>South European</entry>
+ <entry>Yes</entry>
<entry>1</entry>
<entry><literal>ISO88593</></entry>
</row>
@@ -456,6 +472,7 @@ initdb --locale=sv_SE
<entry><literal>LATIN4</literal></entry>
<entry>ISO 8859-4, <acronym>ECMA</> 94</entry>
<entry>North European</entry>
+ <entry>Yes</entry>
<entry>1</entry>
<entry><literal>ISO88594</></entry>
</row>
@@ -463,6 +480,7 @@ initdb --locale=sv_SE
<entry><literal>LATIN5</literal></entry>
<entry>ISO 8859-9, <acronym>ECMA</> 128</entry>
<entry>Turkish</entry>
+ <entry>Yes</entry>
<entry>1</entry>
<entry><literal>ISO88599</></entry>
</row>
@@ -470,6 +488,7 @@ initdb --locale=sv_SE
<entry><literal>LATIN6</literal></entry>
<entry>ISO 8859-10, <acronym>ECMA</> 144</entry>
<entry>Nordic</entry>
+ <entry>Yes</entry>
<entry>1</entry>
<entry><literal>ISO885910</></entry>
</row>
@@ -477,6 +496,7 @@ initdb --locale=sv_SE
<entry><literal>LATIN7</literal></entry>
<entry>ISO 8859-13</entry>
<entry>Baltic</entry>
+ <entry>Yes</entry>
<entry>1</entry>
<entry><literal>ISO885913</></entry>
</row>
@@ -484,6 +504,7 @@ initdb --locale=sv_SE
<entry><literal>LATIN8</literal></entry>
<entry>ISO 8859-14</entry>
<entry>Celtic</entry>
+ <entry>Yes</entry>
<entry>1</entry>
<entry><literal>ISO885914</></entry>
</row>
@@ -491,6 +512,7 @@ initdb --locale=sv_SE
<entry><literal>LATIN9</literal></entry>
<entry>ISO 8859-15</entry>
<entry>LATIN1 with Euro and accents</entry>
+ <entry>Yes</entry>
<entry>1</entry>
<entry>ISO885915</entry>
</row>
@@ -498,6 +520,7 @@ initdb --locale=sv_SE
<entry><literal>LATIN10</literal></entry>
<entry>ISO 8859-16, <acronym>ASRO</> SR 14111</entry>
<entry>Romanian</entry>
+ <entry>Yes</entry>
<entry>1</entry>
<entry><literal>ISO885916</></entry>
</row>
@@ -505,6 +528,7 @@ initdb --locale=sv_SE
<entry><literal>MULE_INTERNAL</literal></entry>
<entry>Mule internal code</entry>
<entry>Multilingual Emacs</entry>
+ <entry>Yes</entry>
<entry>1-4</entry>
<entry></entry>
</row>
@@ -512,6 +536,7 @@ initdb --locale=sv_SE
<entry><literal>SJIS</literal></entry>
<entry>Shift JIS</entry>
<entry>Japanese</entry>
+ <entry>No</entry>
<entry>1-2</entry>
<entry><literal>Mskanji</>, <literal>ShiftJIS</>, <literal>WIN932</>, <literal>Windows932</></entry>
</row>
@@ -519,6 +544,7 @@ initdb --locale=sv_SE
<entry><literal>SQL_ASCII</literal></entry>
<entry>unspecified (see text)</entry>
<entry><emphasis>any</></entry>
+ <entry>Yes</entry>
<entry>1</entry>
<entry></entry>
</row>
@@ -526,6 +552,7 @@ initdb --locale=sv_SE
<entry><literal>UHC</literal></entry>
<entry>Unified Hangul Code</entry>
<entry>Korean</entry>
+ <entry>No</entry>
<entry>1-2</entry>
<entry><literal>WIN949</>, <literal>Windows949</></entry>
</row>
@@ -533,6 +560,7 @@ initdb --locale=sv_SE
<entry><literal>UTF8</literal></entry>
<entry>Unicode, 8-bit</entry>
<entry><emphasis>all</></entry>
+ <entry>Yes</entry>
<entry>1-4</entry>
<entry><literal>Unicode</></entry>
</row>
@@ -540,6 +568,7 @@ initdb --locale=sv_SE
<entry><literal>WIN866</literal></entry>
<entry>Windows CP866</entry>
<entry>Cyrillic</entry>
+ <entry>Yes</entry>
<entry>1</entry>
<entry><literal>ALT</></entry>
</row>
@@ -547,6 +576,7 @@ initdb --locale=sv_SE
<entry><literal>WIN874</literal></entry>
<entry>Windows CP874</entry>
<entry>Thai</entry>
+ <entry>Yes</entry>
<entry>1</entry>
<entry></entry>
</row>
@@ -554,6 +584,7 @@ initdb --locale=sv_SE
<entry><literal>WIN1250</literal></entry>
<entry>Windows CP1250</entry>
<entry>Central European</entry>
+ <entry>Yes</entry>
<entry>1</entry>
<entry></entry>
</row>
@@ -561,6 +592,7 @@ initdb --locale=sv_SE
<entry><literal>WIN1251</literal></entry>
<entry>Windows CP1251</entry>
<entry>Cyrillic</entry>
+ <entry>Yes</entry>
<entry>1</entry>
<entry><literal>WIN</></entry>
</row>
@@ -568,6 +600,7 @@ initdb --locale=sv_SE
<entry><literal>WIN1252</literal></entry>
<entry>Windows CP1252</entry>
<entry>Western European</entry>
+ <entry>Yes</entry>
<entry>1</entry>
<entry></entry>
</row>
@@ -575,6 +608,7 @@ initdb --locale=sv_SE
<entry><literal>WIN1256</literal></entry>
<entry>Windows CP1256</entry>
<entry>Arabic</entry>
+ <entry>Yes</entry>
<entry>1</entry>
<entry></entry>
</row>
@@ -582,6 +616,7 @@ initdb --locale=sv_SE
<entry><literal>WIN1258</literal></entry>
<entry>Windows CP1258</entry>
<entry>Vietnamese</entry>
+ <entry>Yes</entry>
<entry>1</entry>
<entry><literal>ABC</>, <literal>TCVN</>, <literal>TCVN5712</>, <literal>VSCII</></entry>
</row>
@@ -703,12 +738,11 @@ $ <userinput>psql -l</userinput>
<para>
<productname>PostgreSQL</productname> supports automatic
character set conversion between server and client for certain
- character sets. The conversion information is stored in the
- <literal>pg_conversion</> system catalog. You can create a new
- conversion by using the SQL command <command>CREATE
- CONVERSION</command>. <productname>PostgreSQL</> comes with some
- predefined conversions. They are listed in <xref
- linkend="multibyte-translation-table">.
+ character set combinations. The conversion information is stored in the
+ <literal>pg_conversion</> system catalog. <productname>PostgreSQL</>
+ comes with some predefined conversions, as shown in <xref
+ linkend="multibyte-translation-table">. You can create a new
+ conversion using the SQL command <command>CREATE CONVERSION</command>.
</para>
<table id="multibyte-translation-table">
@@ -1029,7 +1063,8 @@ char *pg_encoding_to_char(int <replaceable>encoding_id</replaceable>);
SET CLIENT_ENCODING TO '<replaceable>value</>';
</programlisting>
- Also you can use the more standard SQL syntax <literal>SET NAMES</literal> for this purpose:
+ Also you can use the standard SQL syntax <literal>SET NAMES</literal>
+ for this purpose:
<programlisting>
SET NAMES '<replaceable>value</>';
@@ -1078,10 +1113,8 @@ RESET client_encoding;
If the conversion of a particular character is not possible
&mdash; suppose you chose <literal>EUC_JP</literal> for the
server and <literal>LATIN1</literal> for the client, then some
- Japanese characters cannot be converted to
- <literal>LATIN1</literal> &mdash; it is transformed to its
- hexadecimal byte values in parentheses, e.g.,
- <literal>(826C)</literal>.
+ Japanese characters do not have a representation in
+ <literal>LATIN1</literal> &mdash; then an error is reported.
</para>
<para>