diff options
Diffstat (limited to 'Expat/encoding.h')
-rw-r--r-- | Expat/encoding.h | 91 |
1 files changed, 91 insertions, 0 deletions
diff --git a/Expat/encoding.h b/Expat/encoding.h new file mode 100644 index 0000000..4e0374b --- /dev/null +++ b/Expat/encoding.h @@ -0,0 +1,91 @@ +/***************************************************************** +** encoding.h +** +** Copyright 1998 Clark Cooper +** All rights reserved. +** +** This program is free software; you can redistribute it and/or +** modify it under the same terms as Perl itself. +*/ + +#ifndef ENCODING_H +#define ENCODING_H 1 + +#define ENCMAP_MAGIC 0xfeebface + +typedef struct prefixmap { + unsigned char min; + unsigned char len; /* 0 => 256 */ + unsigned short bmap_start; + unsigned char ispfx[32]; + unsigned char ischar[32]; +} PrefixMap; + +typedef struct encinf +{ + unsigned short prefixes_size; + unsigned short bytemap_size; + int firstmap[256]; + PrefixMap *prefixes; + unsigned short *bytemap; +} Encinfo; + +typedef struct encmaphdr +{ + unsigned int magic; + char name[40]; + unsigned short pfsize; + unsigned short bmsize; + int map[256]; +} Encmap_Header; + +/*================================================================ +** Structure of Encoding map binary encoding +** +** Note that all shorts and ints are in network order, +** so when packing or unpacking with perl, use 'n' and 'N' respectively. +** In C, use the htonl family of functions. +** +** The basic structure is: +** +** _______________________ +** |Header (including map expat needs for 1st byte) +** |PrefixMap * pfsize +** | This section isn't included for single-byte encodings. +** | For multiple byte encodings, when a byte represents a prefix +** | then it indexes into this vector instead of mapping to a +** | Unicode character. The PrefixMap type is declared above. The +** | ispfx and ischar fields are bitvectors indicating whether +** | the byte being mapped is a prefix or character respectively. +** | If neither is set, then the character is not mapped to Unicode. +** | +** | The min field is the 1st byte mapped for this prefix; the +** | len field is the number of bytes mapped; and bmap_start is +** | the starting index of the map for this prefix in the overall +** | map (next section). +** |unsigned short * bmsize +** | This section also is omitted for single-byte encodings. +** | Each short is either a Unicode scalar or an index into the +** | PrefixMap vector. +** +** The header for these files is declared above as the Encmap_Header type. +** The magic field is a magic number which should match the ENCMAP_MAGIC +** macro above. The next 40 bytes stores IANA registered name for the +** encoding. The pfsize field holds the number of PrefixMaps, which should +** be zero for single byte encodings. The bmsize field holds the number of +** shorts used for the overall map. +** +** The map field contains either the Unicode scalar encoded by the 1st byte +** or -n where n is the number of bytes that such a 1st byte implies (Expat +** requires that the number of bytes to encode a character is indicated by +** the 1st byte) or -1 if the byte doesn't map to any Unicode character. +** +** If the encoding is a multiple byte encoding, then there will be PrefixMap +** and character map sections. The 1st PrefixMap (index 0), covers a range +** of bytes that includes all 1st byte prefixes. +** +** Look at convert_to_unicode in Expat.xs to see how this data structure +** is used. +*/ + +#endif /* ndef ENCODING_H */ |