diff options
author | Dave Beckett <dave@dajobe.org> | 2004-08-11 22:16:38 +0000 |
---|---|---|
committer | Dave Beckett <dave@dajobe.org> | 2004-08-11 22:16:38 +0000 |
commit | 445b91bf24f754d5055548d517fc9391b06693fc (patch) | |
tree | 2430fff82d630a2e4ae33812a5e6cb2c79b15153 /src/raptor_nfc_test.c | |
parent | a2c6848588cfa1de2cba795ad370e5e15e94fc46 (diff) | |
download | raptor-445b91bf24f754d5055548d517fc9391b06693fc.tar.gz |
Raptor NFC test
Diffstat (limited to 'src/raptor_nfc_test.c')
-rw-r--r-- | src/raptor_nfc_test.c | 243 |
1 files changed, 243 insertions, 0 deletions
diff --git a/src/raptor_nfc_test.c b/src/raptor_nfc_test.c new file mode 100644 index 00000000..eab50933 --- /dev/null +++ b/src/raptor_nfc_test.c @@ -0,0 +1,243 @@ +/* -*- Mode: c; c-basic-offset: 2 -*- + * + * raptor_nfc_test.c - Raptor Unicode NFC validation check + * + * $Id$ + * + * Copyright (C) 2004 David Beckett - http://purl.org/net/dajobe/ + * Institute for Learning and Research Technology - http://www.ilrt.org/ + * University of Bristol - http://www.bristol.ac.uk/ + * + * This package is Free Software or Open Source available under the + * following licenses (these are alternatives): + * 1. GNU Lesser General Public License (LGPL) + * 2. GNU General Public License (GPL) + * 3. Mozilla Public License (MPL) + * + * See LICENSE.html or LICENSE.txt at the top of this package for the + * full license terms. + * + */ + + +#ifdef HAVE_CONFIG_H +#include <raptor_config.h> +#endif + +#ifdef WIN32 +#include <win32_raptor_config.h> +#endif + +#include <stdio.h> +#include <string.h> +#include <ctype.h> /* for isprint() */ +#include <stdarg.h> +#ifdef HAVE_ERRNO_H +#include <errno.h> +#endif + +/* Raptor includes */ +#include "raptor.h" +#include "raptor_internal.h" +#include "raptor_nfc.h" + + +#undef RAPTOR_NFC_DECODE_DEBUG + + +/** + * utf8_string: destination utf8 buffer (FIXME big enough!) + * unicode_string: first char of string + * end: last char of unicode_string + */ +static int +decode_to_utf8(unsigned char *utf8_string, size_t utf8_string_length, + const char *unicode_string, const char *end) +{ + unsigned char *u=utf8_string; + const char *p=unicode_string; + +#ifdef RAPTOR_NFC_DECODE_DEBUG + fputs("decode_to_utf8: string '", stderr); + fwrite(unicode_string, sizeof(char), (end-unicode_string)+1, stderr); + fputs("' converts to:\n ", stderr); +#endif + + while(p < end) { + unsigned long c=0; + char *endptr; + + if(*p == ' ') { + p++; + continue; + } + + c=(unsigned long)strtol(p, &endptr, 16); + +#ifdef RAPTOR_NFC_DECODE_DEBUG + fprintf(stderr, "U+%04lX ", c); +#endif + + p=(unsigned char*)endptr; + + u+= raptor_unicode_char_to_utf8(c, u); + + if((u-utf8_string) > utf8_string_length) { + fprintf(stderr, + "decode_to_utf8 overwote utf8_string buffer at byte %d\n", + (u-utf8_string)); + abort(); + } + } + +#ifdef RAPTOR_NFC_DECODE_DEBUG + fputs("\n", stderr); +#endif + + return u-utf8_string; +} + + + +static void +utf8_print(const unsigned char *input, int length, FILE *stream) +{ + int i=0; + + while(i<length && *input) { + unsigned long c; + int size=raptor_utf8_to_unicode_char(&c, input, length-i); + if(size <= 0) + return; + if(i) + fputc(' ', stream); + fprintf(stream, "U+%04X", (int)c); + input += size; + i += size; + } +} + + +int +main (int argc, char *argv[]) +{ + char *program; + static const char *filename="NormalizationTest.txt"; + FILE *fh; + int rc=0; + unsigned int line=1; + size_t max_c2_len=0; + size_t max_c4_len=0; + int passes=0; + int fails=0; + + program=argv[0]; + if(1) { + char *p; + if((p=strrchr(program, '/'))) + program=p+1; + else if((p=strrchr(program, '\\'))) + program=p+1; + argv[0]=program; + } + + + fh=fopen(filename, "r"); + if(!fh) { + fprintf(stderr, "%s: file '%s' open failed - %s\n", + program, filename, strerror(errno)); + return 1; + } + +#define LINE_BUFFER_SIZE 1024 + +/* FIXME big enough for Unicode 4 (c2 max 16; c4 max 33) */ +#define UNISTR_SIZE 40 + + for(;!feof(fh); line++) { + char buffer[LINE_BUFFER_SIZE]; + char *p, *start; + unsigned char nfc1[UNISTR_SIZE]; + unsigned char nfc2[UNISTR_SIZE]; + size_t nfc1_len, nfc2_len; + int nfc_rc; + int error; + + p=fgets(buffer, LINE_BUFFER_SIZE, fh); + if(!p) { + if(ferror(fh)) { + fprintf(stderr, "%s: file '%s' read failed - %s\n", + program, filename, strerror(errno)); + rc=1; + break; + } + /* assume feof */ + break; + }; + +#if 0 + fprintf(stderr, "%s:%d: line '%s'\n", program, line, buffer); +#endif + + /* skip lines */ + if(*p == '@' || *p == '#') + continue; + + /* skip column 1 */ + while(*p++ != ';') + ; + + start=p; + /* find end column 2 */ + while(*p++ != ';') + ; + + nfc1_len=decode_to_utf8(nfc1, UNISTR_SIZE, start, p-1); + if(nfc1_len > max_c2_len) + max_c2_len=nfc1_len; + + /* skip column 3 */ + while(*p++ != ';') + ; + + start=p; + /* find end column 4 */ + while(*p++ != ';') + ; + + nfc2_len=decode_to_utf8(nfc2, UNISTR_SIZE, start, p-1); + if(nfc2_len > max_c4_len) + max_c4_len=nfc2_len; + + nfc_rc=raptor_nfc_check(nfc1, nfc1_len, &error); + if(!nfc_rc) { + fprintf(stderr, "%s:%d: NFC check 1 failed on: '", filename, line); + utf8_print(nfc1, nfc1_len, stderr); + fprintf(stderr, "' at byte %d of %d\n", error, (int)nfc1_len); + fails++; + } else + passes++; + + if(nfc1_len == nfc2_len && !memcmp(nfc1, nfc2, nfc1_len)) + continue; + + nfc_rc=raptor_nfc_check(nfc2, nfc2_len, &error); + if(!nfc_rc) { + fprintf(stderr, "%s:%d: NFC check 2 failed on: '", filename, line); + utf8_print(nfc2, nfc2_len, stderr); + fprintf(stderr, "' at byte %d of %d\n", error, (int)nfc2_len); + fails++; + } else + passes++; + + } + + fclose(fh); + + fprintf(stderr, "%s: max c2 len: %d, max c4 len: %d\n", program, + (int)max_c2_len, (int)max_c4_len); + fprintf(stderr, "%s: passes: %d fails: %d\n", program, + passes, fails); + + return rc; +} |