#----------------------------------------------------------------------- =head1 NAME Locale::Language - ISO two letter codes for language identification (ISO 639) =head1 SYNOPSIS use Locale::Language; $lang = code2language('en'); # $lang gets 'English' $code = language2code('French'); # $code gets 'fr' @codes = all_language_codes(); @names = all_language_names(); =cut #----------------------------------------------------------------------- package Locale::Language; use strict; require 5.002; #----------------------------------------------------------------------- =head1 DESCRIPTION The C module provides access to the ISO two-letter codes for identifying languages, as defined in ISO 639. You can either access the codes via the L (described below), or with the two functions which return lists of all language codes or all language names. =cut #----------------------------------------------------------------------- require Exporter; #----------------------------------------------------------------------- # Public Global Variables #----------------------------------------------------------------------- use vars qw($VERSION @ISA @EXPORT); $VERSION = sprintf("%d.%02d", q$Revision: 1.6 $ =~ /(\d+)\.(\d+)/); @ISA = qw(Exporter); @EXPORT = qw(&code2language &language2code &all_language_codes &all_language_names ); #----------------------------------------------------------------------- # Private Global Variables #----------------------------------------------------------------------- my %CODES = (); my %LANGUAGES = (); #======================================================================= =head1 CONVERSION ROUTINES There are two conversion routines: C and C. =over 8 =item code2language() This function takes a two letter language code and returns a string which contains the name of the language identified. If the code is not a valid language code, as defined by ISO 639, then C will be returned. $lang = code2language($code); =item language2code() This function takes a language name and returns the corresponding two letter language code, if such exists. If the argument could not be identified as a language name, then C will be returned. $code = language2code('French'); The case of the language name is not important. See the section L below. =back =cut #======================================================================= sub code2language { my $code = shift; return undef unless defined $code; $code = lc($code); if (exists $CODES{$code}) { return $CODES{$code}; } else { #--------------------------------------------------------------- # no such language code! #--------------------------------------------------------------- return undef; } } sub language2code { my $lang = shift; return undef unless defined $lang; $lang = lc($lang); if (exists $LANGUAGES{$lang}) { return $LANGUAGES{$lang}; } else { #--------------------------------------------------------------- # no such language! #--------------------------------------------------------------- return undef; } } #======================================================================= =head1 QUERY ROUTINES There are two function which can be used to obtain a list of all language codes, or all language names: =over 8 =item C Returns a list of all two-letter language codes. The codes are guaranteed to be all lower-case, and not in any particular order. =item C Returns a list of all language names for which there is a corresponding two-letter language code. The names are capitalised, and not returned in any particular order. =back =cut #======================================================================= sub all_language_codes { return keys %CODES; } sub all_language_names { return values %CODES; } #----------------------------------------------------------------------- =head1 EXAMPLES The following example illustrates use of the C function. The user is prompted for a language code, and then told the corresponding language name: $| = 1; # turn off buffering print "Enter language code: "; chop($code = ); $lang = code2language($code); if (defined $lang) { print "$code = $lang\n"; } else { print "'$code' is not a valid language code!\n"; } =head1 KNOWN BUGS AND LIMITATIONS =over 4 =item * In the current implementation, all data is read in when the module is loaded, and then held in memory. A lazy implementation would be more memory friendly. =item * Currently just supports the two letter language codes - there are also three-letter codes, and numbers. Would these be of any use to anyone? =back =head1 SEE ALSO =over 4 =item Locale::Country ISO codes for identification of country (ISO 3166). Supports 2-letter, 3-letter, and numeric country codes. =item Locale::Currency ISO three letter codes for identification of currencies and funds (ISO 4217). =item ISO 639:1988 (E/F) Code for the representation of names of languages. =item http://lcweb.loc.gov/standards/iso639-2/langhome.html Home page for ISO 639-2 =back =head1 AUTHOR Neil Bowers Eneilb@cre.canon.co.ukE =head1 COPYRIGHT Copyright (c) 1997-2001 Canon Research Centre Europe (CRE). This module is free software; you can redistribute it and/or modify it under the same terms as Perl itself. =cut #----------------------------------------------------------------------- #======================================================================= # initialisation code - stuff the DATA into the CODES hash #======================================================================= { my $code; my $language; while () { next unless /\S/; chop; ($code, $language) = split(/:/, $_, 2); $CODES{$code} = $language; $LANGUAGES{"\L$language"} = $code; } } 1; __DATA__ aa:Afar ab:Abkhazian ae:Avestan af:Afrikaans am:Amharic ar:Arabic as:Assamese ay:Aymara az:Azerbaijani ba:Bashkir be:Belarusian bg:Bulgarian bh:Bihari bi:Bislama bn:Bengali bo:Tibetan br:Breton bs:Bosnian ca:Catalan ce:Chechen ch:Chamorro co:Corsican cs:Czech cu:Church Slavic cv:Chuvash cy:Welsh da:Danish de:German dz:Dzongkha el:Greek en:English eo:Esperanto es:Spanish et:Estonian eu:Basque fa:Persian fi:Finnish fj:Fijian fo:Faeroese fr:French fy:Frisian ga:Irish gd:Gaelic (Scots) gl:Gallegan gn:Guarani gu:Gujarati gv:Manx ha:Hausa he:Hebrew hi:Hindi ho:Hiri Motu hr:Croatian hu:Hungarian hy:Armenian hz:Herero ia:Interlingua id:Indonesian ie:Interlingue ik:Inupiaq is:Icelandic it:Italian iu:Inuktitut ja:Japanese jw:Javanese ka:Georgian ki:Kikuyu kj:Kuanyama kk:Kazakh kl:Kalaallisut km:Khmer kn:Kannada ko:Korean ks:Kashmiri ku:Kurdish kv:Komi kw:Cornish ky:Kirghiz la:Latin lb:Letzeburgesch ln:Lingala lo:Lao lt:Lithuanian lv:Latvian mg:Malagasy mh:Marshall mi:Maori mk:Macedonian ml:Malayalam mn:Mongolian mo:Moldavian mr:Marathi ms:Malay mt:Maltese my:Burmese na:Nauru nb:Norwegian Bokmål nd:Ndebele, North ne:Nepali ng:Ndonga nl:Dutch nn:Norwegian Nynorsk no:Norwegian nr:Ndebele, South nv:Navajo ny:Chichewa; Nyanja oc:Occitan (post 1500) om:Oromo or:Oriya os:Ossetian; Ossetic pa:Panjabi pi:Pali pl:Polish ps:Pushto pt:Portuguese qu:Quechua rm:Rhaeto-Romance rn:Rundi ro:Romanian ru:Russian rw:Kinyarwanda sa:Sanskrit sc:Sardinian sd:Sindhi se:Sami sg:Sango si:Sinhalese sk:Slovak sl:Slovenian sm:Samoan sn:Shona so:Somali sq:Albanian sr:Serbian ss:Swati st:Sotho su:Sundanese sv:Swedish sw:Swahili ta:Tamil te:Telugu tg:Tajik th:Thai ti:Tigrinya tk:Turkmen tl:Tagalog tn:Tswana to:Tonga tr:Turkish ts:Tsonga tt:Tatar tw:Twi ug:Uighur uk:Ukrainian ur:Urdu uz:Uzbek vi:Vietnamese vo:Volapük wo:Wolof xh:Xhosa yi:Yiddish yo:Yoruba za:Zhuang zh:Chinese zu:Zulu