summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorxhe <xw897002528@gmail.com>2018-11-28 15:26:53 +0800
committerrofl0r <retnyg@gmx.net>2019-01-16 02:38:18 +0000
commita63618cdb119891c2b7923de87c21125286d7691 (patch)
treec7989d9c8ac55304b502c7397cbcab72866d6cfb
parent98bd054905c761ae48b1c912a797e45986870d35 (diff)
downloadgettext-tiny-a63618cdb119891c2b7923de87c21125286d7691.tar.gz
complete rewrite of poparser, msgmerge/msgfmt ported
as stated in https://github.com/sabotage-linux/gettext-tiny/issues/39, the old parser is not good enough to handle all the po files. Similiar issues occurred over and over again because our dirty hacks in the project. so, i propose and implement this new parser, which: 1. needs to parse a po file two times. the first time will acquire the maximum width of every entry. the second time will copy the well-prepared contents into struct po_message, and pass it to the callback function. 2. every struct po_message contains all the information of one translation: msgid, msgctxt, msgid_plural, and msgstrs. comments may be added later. the logic of code is quite simple, nothing special need to explain. the special points are: 1. the first time, new parser gives no infomation about what the string is like. neither will the new parser give the exact size(sysdeped), nor you can calculate the exact size on your own. only xxx_len, strlen, sy- sdep in po_message_t is available. xxx_len is the length of the corressponding entry, strlen is almost the same. 2. sysdep present how many cases the string could be expanded to. since you know the length of the original string and the original string is always longer than the converted one, you can get a safe buffer size to work at the second stage. 3. poparser_sysdep(), a function like unescape(), with a bit flag as the third argument. that is, three bits correspond to st_priu32, st_priu64, st_priumax. since there're only up to two cases for every kind of sysdep, you could count from 0 to msg->sysdep-1, and poparser_sysdep will iterate every possible case eventually.
-rw-r--r--src/msgfmt.c566
-rw-r--r--src/msgmerge.c91
-rw-r--r--src/poparser.c541
-rw-r--r--src/poparser.h100
4 files changed, 669 insertions, 629 deletions
diff --git a/src/msgfmt.c b/src/msgfmt.c
index c5d43d7..98a9b88 100644
--- a/src/msgfmt.c
+++ b/src/msgfmt.c
@@ -1,28 +1,21 @@
/* msgfmt utility (C) 2012 rofl0r
* released under the MIT license, see LICENSE for details */
+#define _BSD_SOURCE
#include <stdio.h>
#include <stdlib.h>
+#include <stdint.h>
#include <string.h>
#include <ctype.h>
+#include <limits.h>
#include <assert.h>
#include "poparser.h"
-// in DO_NOTHING mode, we simply write the msgid twice, once for msgid, once for msgstr.
-// TODO: maybe make it write "" instead of echoing the msgid.
-//#define DO_NOTHING
-
-__attribute__((noreturn))
static void syntax(void) {
- fprintf(stdout,
- "Usage: msgfmt [OPTION] filename.po ...\n");
- exit(1);
+ fprintf(stdout, "Usage: msgfmt [OPTION] filename.po ...\n");
}
-__attribute__((noreturn))
static void version(void) {
- fprintf(stdout,
- "msgfmt (GNU gettext-tools compatible) 99.9999.9999\n");
- exit(0);
+ fprintf(stdout, "msgfmt (GNU gettext-tools compatible) 99.9999.9999\n");
}
#define streq(A, B) (!strcmp(A, B))
@@ -56,314 +49,114 @@ const struct mo_hdr def_hdr = {
0,
};
-
-// pass 0: collect numbers of strings, calculate size and offsets for tables
-// print header
-// pass 1: create in-memory string tables
-enum passes {
- pass_first = 0,
- pass_collect_sizes = pass_first,
- pass_second,
- pass_max,
-};
-
struct strtbl {
- unsigned len, off;
+ uint32_t len, off;
};
struct strmap {
- struct strtbl str, *trans;
+ struct strtbl str;
+ struct strtbl trans;
};
struct callbackdata {
- enum passes pass;
- unsigned off;
FILE* out;
- unsigned msgidbuf1_len;
- unsigned msgidbuf2_len;
- unsigned pluralbuf1_len;
- unsigned pluralbuf2_len;
- unsigned ctxtbuf_len;
- unsigned msgstr1_len;
- unsigned msgstr2_len;
- unsigned pluralstr_count;
- unsigned string_maxlen;
- char* msgidbuf1;
- char* msgidbuf2;
- char* pluralbuf1;
- char* pluralbuf2;
- char* msgctxtbuf;
- char* msgstrbuf1;
- char* msgstrbuf2;
- unsigned priv_type;
- unsigned priv_len;
- unsigned num[pe_maxstr];
- unsigned len[pe_maxstr];
- struct strmap *strlist;
- struct strtbl *translist;
- char *strbuffer[pe_maxstr];
- unsigned stroff[pe_maxstr];
- unsigned curr[pe_maxstr];
+ enum po_stage stage;
+ size_t cnt;
+ size_t len[2];
+ char* buf[2];
+ struct strmap *list;
};
static struct callbackdata *cb_for_qsort;
-int strmap_comp(const void *a_, const void *b_) {
+int strtbl_cmp(const void *a_, const void *b_) {
const struct strmap *a = a_, *b = b_;
- return strcmp(cb_for_qsort->strbuffer[0] + a->str.off, cb_for_qsort->strbuffer[0] + b->str.off);
+ return strcmp(cb_for_qsort->buf[0] + a->str.off, cb_for_qsort->buf[0] + b->str.off);
}
-enum sysdep_types {
- st_priu32 = 0,
- st_priu64,
- st_priumax,
- st_max
-};
+int process_line_callback(po_message_t msg, void* user) {
+ struct callbackdata *d = (struct callbackdata *) user;
+ struct strtbl *str, *trans;
+ size_t m;
+ int cnt[st_max] = {0};
+ int i, k;
-static const char sysdep_str[][10]={
- [st_priu32] = "\x08<PRIu32>",
- [st_priu64] = "\x08<PRIu64>",
- [st_priumax] = "\x09<PRIuMAX>",
-};
-static const char sysdep_repl[][8]={
- [st_priu32] = "\x02lu\0u",
- [st_priu64] = "\x02lu\0llu",
- [st_priumax] = "\x01ju"
-};
-static const char *get_repl(enum sysdep_types type, unsigned nr) {
- assert(nr < (unsigned)sysdep_repl[type][0]);
- const char* p = sysdep_repl[type]+1;
- while(nr--) p+=strlen(p)+1;
- return p;
-}
-static void replace(char* text, unsigned textlen, const char* what, const char * with) {
- char*p = text;
- size_t la = strlen(what), li=strlen(with);
- assert(la >= li);
- for(p=text;textlen >= la;) {
- if(!memcmp(p,what,la)) {
- memcpy(p, with, li);
- textlen -= la;
- memmove(p+li,p+la,textlen+1);
- p+=li;
- } else {
- p++;
- textlen--;
- }
- }
-}
-static unsigned get_form(enum sysdep_types type, unsigned no, unsigned occurences[st_max]) {
- unsigned i,divisor = 1;
- for(i=type+1;i<st_max;i++) if(occurences[i]) divisor *= sysdep_repl[i][0];
- return (no/divisor)%sysdep_repl[type][0];
-}
-static char** sysdep_transform(const char* text, unsigned textlen, unsigned *len, unsigned *count, int simulate) {
- unsigned occurences[st_max] = {0};
- const char *p=text,*o;
- unsigned i,j, l = textlen;
- while(l && (o=strchr(p, '<'))) {
- l-=o-p;p=o;
- unsigned f = 0;
- for(i=0;i<st_max;i++)
- if(l>=(unsigned)sysdep_str[i][0] && !memcmp(p,sysdep_str[i]+1,sysdep_str[i][0])) {
- occurences[i]++;
- f=1;
- p+=sysdep_str[i][0];
- l-=sysdep_str[i][0];
- break;
- }
- if(!f) p++,l--;
- }
- *count = 1;
- for(i=0;i<st_max;i++) if(occurences[i]) *count *= sysdep_repl[i][0];
- l = textlen * *count;
- for(i=0;i<*count;i++) for(j=0;j<st_max;j++)
- if(occurences[j]) l-= occurences[j] * (sysdep_str[j][0] - strlen(get_repl(j, get_form(j, i, occurences))));
- *len = l+*count-1;
-
- char **out = 0;
- if(!simulate) {
- out = malloc((sizeof(char*)+textlen+1) * *count);
- assert(out);
- char *p = (void*)(out+*count);
- for(i=0;i<*count;i++) {
- out[i]=p;
- memcpy(p, text, textlen+1);
- p+=textlen+1;
- }
- for(i=0;i<*count;i++) for(j=0;j<st_max;j++)
- if(occurences[j])
- replace(out[i], textlen, sysdep_str[j]+1, get_repl(j, get_form(j, i, occurences)));
- }
+ if (msg->flags & PO_FUZZY) return 0;
+ if (msg->strlen[0] == 0) return 0;
- return out;
-}
+ switch(d->stage) {
+ case ps_size:
+ k = 1;
+ for (i=0; i < st_max; i++)
+ if (msg->sysdep[i])
+ k *= msg->sysdep[i];
-static void error(const char* msg) {
- fprintf(stderr, msg);
- exit(1);
-}
+ d->len[0] += (msg->id_len + 1)*k;
-static inline void writemsg(struct callbackdata *d) {
- if(d->msgidbuf1_len != 0) {
- if(!d->strlist[d->curr[pe_msgid]].str.off)
- d->strlist[d->curr[pe_msgid]].str.off=d->stroff[pe_msgid];
+ if (msg->plural_len)
+ d->len[0] += (msg->plural_len + 1)*k;
- if(d->ctxtbuf_len != 0) {
- memcpy(d->strbuffer[pe_msgid] + d->stroff[pe_msgid], d->msgctxtbuf, d->ctxtbuf_len);
- d->strlist[d->curr[pe_msgid]].str.len+=d->ctxtbuf_len;
- d->stroff[pe_msgid]+=d->ctxtbuf_len;
- }
- memcpy(d->strbuffer[pe_msgid] + d->stroff[pe_msgid], d->msgidbuf1, d->msgidbuf1_len);
- d->stroff[pe_msgid]+=d->msgidbuf1_len;
- d->strlist[d->curr[pe_msgid]].str.len+=d->msgidbuf1_len-1;
- if(d->pluralbuf1_len != 0) {
- memcpy(d->strbuffer[pe_msgid] + d->stroff[pe_msgid], d->pluralbuf1, d->pluralbuf1_len);
- d->strlist[d->curr[pe_msgid]].str.len+=d->pluralbuf1_len;
- d->stroff[pe_msgid]+=d->pluralbuf1_len;
- }
- d->curr[pe_msgid]++;
- }
- if(d->msgidbuf2_len != 0) {
- if(!d->strlist[d->curr[pe_msgid]].str.off)
- d->strlist[d->curr[pe_msgid]].str.off=d->stroff[pe_msgid];
-
- if(d->ctxtbuf_len != 0) {
- memcpy(d->strbuffer[pe_msgid] + d->stroff[pe_msgid], d->msgctxtbuf, d->ctxtbuf_len);
- d->strlist[d->curr[pe_msgid]].str.len+=d->ctxtbuf_len;
- d->stroff[pe_msgid]+=d->ctxtbuf_len;
- }
- memcpy(d->strbuffer[pe_msgid] + d->stroff[pe_msgid], d->msgidbuf2, d->msgidbuf2_len);
- d->stroff[pe_msgid]+=d->msgidbuf2_len;
- d->strlist[d->curr[pe_msgid]].str.len+=d->msgidbuf2_len-1;
- if(d->pluralbuf2_len != 0) {
- memcpy(d->strbuffer[pe_msgid] + d->stroff[pe_msgid], d->pluralbuf2, d->pluralbuf2_len);
- d->strlist[d->curr[pe_msgid]].str.len+=d->pluralbuf2_len;
- d->stroff[pe_msgid]+=d->pluralbuf2_len;
- }
- d->curr[pe_msgid]++;
- }
+ if (msg->ctxt_len)
+ d->len[0] += (msg->ctxt_len + 1)*k;
- d->pluralbuf2_len=d->pluralbuf1_len=d->ctxtbuf_len=d->msgidbuf1_len=d->msgidbuf2_len=0;
-}
+ for (i=0; msg->strlen[i]; i++)
+ d->len[1] += (msg->strlen[i] + 1)*k;
-static inline void writestr(struct callbackdata *d, struct po_info *info) {
- // msgid xx; msgstr ""; is widely happened, it's invalid
-
- // https://github.com/sabotage-linux/gettext-tiny/issues/1
- // no invalid, when empty, check d->num[pe_msgid]
- if(!d->pluralstr_count && d->num[pe_msgid] > 0) {
- d->len[pe_msgid]-=d->msgidbuf1_len;
- d->len[pe_msgid]-=d->msgidbuf2_len;
- d->len[pe_plural]-=d->pluralbuf1_len;
- d->len[pe_plural]-=d->pluralbuf2_len;
- d->len[pe_ctxt]-=d->ctxtbuf_len;
- d->len[pe_msgstr]--;
- d->num[pe_msgid]--;
- d->num[pe_msgstr]--;
- d->pluralbuf2_len=d->pluralbuf1_len=d->ctxtbuf_len=d->msgidbuf1_len=d->msgidbuf2_len=d->msgstr1_len=d->msgstr2_len=d->pluralstr_count=0;
- return;
- }
+ d->cnt += k;
+ break;
+ case ps_parse:
+ for (k=1; k; d->cnt++) {
+ k = 0;
- if(d->pluralstr_count && d->pluralstr_count <= info->nplurals) {
- writemsg(d);
- // plural <= nplurals is allowed
- d->translist[d->curr[pe_msgstr]].len=d->msgstr1_len-1;
- d->translist[d->curr[pe_msgstr]].off=d->stroff[pe_msgstr];
- d->strlist[d->curr[pe_msgstr]].trans = &d->translist[d->curr[pe_msgstr]];
-
- memcpy(d->strbuffer[pe_msgstr] + d->stroff[pe_msgstr], d->msgstrbuf1, d->msgstr1_len);
- d->stroff[pe_msgstr]+=d->msgstr1_len;
- d->curr[pe_msgstr]++;
-
- if(d->msgstr2_len) {
- d->translist[d->curr[pe_msgstr]].len=d->msgstr2_len-1;
- d->translist[d->curr[pe_msgstr]].off=d->stroff[pe_msgstr];
- d->strlist[d->curr[pe_msgstr]].trans = &d->translist[d->curr[pe_msgstr]];
-
- memcpy(d->strbuffer[pe_msgstr] + d->stroff[pe_msgstr], d->msgstrbuf2, d->msgstr2_len);
- d->stroff[pe_msgstr]+=d->msgstr2_len;
- d->curr[pe_msgstr]++;
- }
+ str = &d->list[d->cnt].str;
+ trans = &d->list[d->cnt].trans;
- d->msgstr1_len=d->msgstr2_len=d->pluralstr_count=0;
- }
-}
+ str->off = d->len[0];
+ str->len = 0;
-int process_line_callback(struct po_info* info, void* user) {
- struct callbackdata *d = (struct callbackdata *) user;
- assert(info->type == pe_msgid || info->type == pe_ctxt || info->type == pe_msgstr || info->type == pe_plural);
- char **sysdeps;
- unsigned len, count, i, l;
- switch(d->pass) {
- case pass_collect_sizes:
- sysdep_transform(info->text, info->textlen, &len, &count, 1);
- d->num[info->type] += count;
- if(info->type == pe_msgid && count == 2 && d->priv_type == pe_ctxt) {
- // ctxt meets msgid with sysdeps, multiply num and len to suit it
- d->len[pe_ctxt] += d->priv_len +1;
- d->num[pe_ctxt]++;
+ if (msg->ctxt_len) {
+ m = poparser_sysdep(msg->ctxt, &d->buf[0][d->len[0]], cnt);
+ str->len += m;
+ d->buf[0][d->len[0]+m-1] = 0x4;
+ d->len[0] += m;
+ }
+
+ m = poparser_sysdep(msg->id, &d->buf[0][d->len[0]], cnt);
+ str->len += m;
+ d->len[0] += m;
+
+ if (msg->plural_len) {
+ m = poparser_sysdep(msg->plural, &d->buf[0][d->len[0]], cnt);
+ str->len += m;
+ d->len[0] += m;
}
- if(count != 1 && info->type == pe_ctxt) {
- // except msgid, str, plural, all other types should not have sysdeps
- abort();
+
+ trans->off = d->len[1];
+ trans->len = 0;
+ for (i=0; msg->strlen[i]; i++) {
+ m = poparser_sysdep(msg->str[i], &d->buf[1][d->len[1]], cnt);
+ trans->len += m;
+ d->len[1] += m;
}
- d->priv_type = info->type;
- d->priv_len = len;
- d->len[info->type] += len +1;
-
- if(len+1 > d->string_maxlen)
- d->string_maxlen = len+1;
- break;
- case pass_second:
- sysdeps = sysdep_transform(info->text, info->textlen, &len, &count, 0);
- for(i=0;i<count;i++) {
- l = strlen(sysdeps[i]);
- assert(l+1 <= d->string_maxlen);
- if(info->type == pe_msgid) {
- if(i==0 && d->msgidbuf1_len)
- writestr(d, info);
-
- // just copy, it's written down when writemsg()
- if(i==0) {
- memcpy(d->msgidbuf1, sysdeps[i], l+1);
- d->msgidbuf1_len = l+1;
- } else {
- memcpy(d->msgidbuf2, sysdeps[i], l+1);
- d->msgidbuf2_len = l+1;
- }
- } else if(info->type == pe_plural) {
- if(i==0) {
- memcpy(d->pluralbuf1, sysdeps[i], l+1);
- d->pluralbuf1_len = l+1;
- } else {
- memcpy(d->pluralbuf2, sysdeps[i], l+1);
- d->pluralbuf2_len = l+1;
- }
- } else if(info->type == pe_ctxt) {
- writestr(d, info);
- d->ctxtbuf_len = l+1;
- memcpy(d->msgctxtbuf, sysdeps[i], l);
- d->msgctxtbuf[l] = 0x4;//EOT
- } else {
- // just copy, it's written down when writestr()
- if(l) {
- if(i==0) {
- memcpy(&d->msgstrbuf1[d->msgstr1_len], sysdeps[i], l+1);
- d->msgstr1_len += l+1;
- d->pluralstr_count++;
- } else {
- // sysdeps exist
- memcpy(&d->msgstrbuf2[d->msgstr2_len], sysdeps[i], l+1);
- d->msgstr2_len += l+1;
- }
+ for (i=0; i < st_max; i++) {
+ if (cnt[i] < msg->sysdep[i]) {
+ cnt[i]++;
+
+ // we have a carry
+ if (cnt[i] == msg->sysdep[i]) {
+ cnt[i] = 0;
+ continue;
}
+
+ k = 1;
+ break;
}
}
- free(sysdeps);
- break;
- default:
- abort();
+ }
+
+ break;
+ default:
+ abort();
}
return 0;
}
@@ -371,113 +164,88 @@ int process_line_callback(struct po_info* info, void* user) {
int process(FILE *in, FILE *out) {
struct mo_hdr mohdr = def_hdr;
char line[8192]; char *lp;
+ size_t off, i;
+ enum po_error t;
char convbuf[32768];
struct callbackdata d = {
- .num = {
- [pe_msgid] = 0,
- [pe_msgstr] = 0,
- [pe_plural] = 0,
- [pe_ctxt] = 0,
- },
- .len = {
- [pe_msgid] = 0,
- [pe_msgstr] = 0,
- [pe_plural] = 0,
- [pe_ctxt] = 0,
- },
- .off = 0,
+ .len = {0, 0},
+ .cnt = 0,
.out = out,
- .pass = pass_first,
- .ctxtbuf_len = 0,
- .pluralbuf1_len = 0,
- .pluralbuf2_len = 0,
- .msgidbuf1_len = 0,
- .msgidbuf2_len = 0,
- .msgstr1_len = 0,
- .msgstr2_len = 0,
- .pluralstr_count = 0,
- .string_maxlen = 0,
};
struct po_parser pb, *p = &pb;
mohdr.off_tbl_trans = mohdr.off_tbl_org;
- for(d.pass = pass_first; d.pass <= pass_second; d.pass++) {
- if(d.pass == pass_second) {
- // start of second pass:
- // ensure we dont output when there's no strings at all
- if(d.num[pe_msgid] == 0) {
- return 1;
- }
-
- // check that data gathered in first pass is consistent
- if((d.num[pe_msgstr] < d.num[pe_msgid]) || (d.num[pe_msgstr] > (d.num[pe_msgid] + d.num[pe_plural] * (p->info.nplurals - 1)))) {
- // one should actually abort here,
- // but gnu gettext simply writes an empty .mo and returns success.
- //abort();
- fprintf(stderr, "warning: mismatch of msgid/msgstr count, writing empty .mo file\n");
- d.num[pe_msgid] = 0;
- return 0;
- }
- d.msgidbuf1 = calloc(d.string_maxlen*5+2*d.string_maxlen*p->info.nplurals, 1);
- d.msgidbuf2 = d.msgidbuf1 + d.string_maxlen;
- d.pluralbuf1 = d.msgidbuf2 + d.string_maxlen;
- d.pluralbuf2 = d.pluralbuf1 + d.string_maxlen;
- d.msgctxtbuf = d.pluralbuf2 + d.string_maxlen;
- d.msgstrbuf1 = d.msgctxtbuf + d.string_maxlen;
- d.msgstrbuf2 = d.msgstrbuf1 + d.string_maxlen*p->info.nplurals;
-
- d.strlist = calloc(d.num[pe_msgid] * sizeof(struct strmap), 1);
- d.translist = calloc(d.num[pe_msgstr] * sizeof(struct strtbl), 1);
- d.strbuffer[pe_msgid] = calloc(d.len[pe_msgid]+d.len[pe_plural]+d.len[pe_ctxt], 1);
- d.strbuffer[pe_msgstr] = calloc(d.len[pe_msgstr], 1);
- d.stroff[pe_msgid] = d.stroff[pe_msgstr] = 0;
- assert(d.msgidbuf1 && d.strlist && d.translist && d.strbuffer[pe_msgid] && d.strbuffer[pe_msgstr]);
- }
-
- poparser_init(p, convbuf, sizeof(convbuf), process_line_callback, &d);
+ poparser_init(p, convbuf, sizeof(convbuf), process_line_callback, &d);
+ d.stage = p->stage;
- while((lp = fgets(line, sizeof(line), in))) {
- poparser_feed_line(p, lp, sizeof(line));
- }
- poparser_finish(p);
- if(d.pass == pass_second)
- writestr(&d, &p->info);
-
- if(d.pass == pass_second) {
- // calculate header fields from len and num arrays
- mohdr.numstring = d.num[pe_msgid];
- mohdr.off_tbl_org = sizeof(struct mo_hdr);
- mohdr.off_tbl_trans = mohdr.off_tbl_org + d.num[pe_msgid] * (sizeof(unsigned)*2);
- // set offset startvalue
- d.off = mohdr.off_tbl_trans + d.num[pe_msgid] * (sizeof(unsigned)*2);
+ while((lp = fgets(line, sizeof(line), in))) {
+ if ((t = poparser_feed_line(p, lp, strlen(line))) != po_success)
+ return t;
+ }
+ if ((t = poparser_finish(p)) != po_success)
+ return t;
+
+ if (d.cnt == 0) return -1;
+
+ d.list = (struct strmap*)malloc(sizeof(struct strmap)*d.cnt);
+ d.buf[0] = (char*)malloc(d.len[0]);
+ d.buf[1] = (char*)malloc(d.len[1]);
+ d.len[0] = 0;
+ d.len[1] = 0;
+ d.cnt = 0;
+ d.stage = p->stage;
+
+ fseek(in, 0, SEEK_SET);
+ while ((lp = fgets(line, sizeof(line), in))) {
+ if ((t = poparser_feed_line(p, lp, strlen(line))) != po_success) {
+ free(d.list);
+ free(d.buf[0]);
+ free(d.buf[1]);
+ return t;
}
- fseek(in, 0, SEEK_SET);
+ }
+ if ((t = poparser_finish(p)) != po_success) {
+ free(d.list);
+ free(d.buf[0]);
+ free(d.buf[1]);
+ return t;
}
cb_for_qsort = &d;
- qsort(d.strlist, d.num[pe_msgid], sizeof (struct strmap), strmap_comp);
- unsigned i;
+ qsort(d.list, d.cnt, sizeof(struct strmap), strtbl_cmp);
+ cb_for_qsort = NULL;
// print header
+ mohdr.numstring = d.cnt;
+ mohdr.off_tbl_org = sizeof(struct mo_hdr);
+ mohdr.off_tbl_trans = mohdr.off_tbl_org + d.cnt * sizeof(struct strtbl);
fwrite(&mohdr, sizeof(mohdr), 1, out);
- for(i = 0; i < d.num[pe_msgid]; i++) {
- d.strlist[i].str.off += d.off;
- fwrite(&d.strlist[i].str, sizeof(struct strtbl), 1, d.out);
+
+ off = mohdr.off_tbl_trans + d.cnt * sizeof(struct strtbl);
+ for (i = 0; i < d.cnt; i++) {
+ d.list[i].str.off += off;
+ fwrite(&d.list[i].str, sizeof(struct strtbl), 1, d.out);
}
- for(i = 0; i < d.num[pe_msgid]; i++) {
- d.strlist[i].trans->off += d.off + d.len[pe_msgid] + d.len[pe_plural] + d.len[pe_ctxt];
- fwrite(d.strlist[i].trans, sizeof(struct strtbl), 1, d.out);
+
+ off += d.len[0];
+ for (i = 0; i < d.cnt; i++) {
+ d.list[i].trans.off += off;
+ fwrite(&d.list[i].trans, sizeof(struct strtbl), 1, d.out);
}
- fwrite(d.strbuffer[pe_msgid], d.len[pe_msgid]+d.len[pe_plural]+d.len[pe_ctxt], 1, d.out);
- fwrite(d.strbuffer[pe_msgstr], d.len[pe_msgstr], 1, d.out);
+
+ fwrite(d.buf[0], d.len[0], 1, d.out);
+ fwrite(d.buf[1], d.len[1], 1, d.out);
+
+ free(d.list);
+ free(d.buf[0]);
+ free(d.buf[1]);
return 0;
}
-
void set_file(int out, char* fn, FILE** dest) {
if(streq(fn, "-")) {
if(out) {
@@ -505,11 +273,16 @@ void set_file(int out, char* fn, FILE** dest) {
}
int main(int argc, char**argv) {
- if(argc == 1) syntax();
+ if (argc == 1) {
+ syntax();
+ return 0;
+ }
+
int arg = 1;
FILE *out = NULL;
FILE *in = NULL;
int expect_in_fn = 1;
+ char path[PATH_MAX];
char* locale = NULL;
char* dest = NULL;
#define A argv[arg]
@@ -539,19 +312,20 @@ int main(int argc, char**argv) {
streq(A+2, "statistics") ||
strstarts(A+2, "check-accelerators=") ||
strstarts(A+2, "resource=")
- ) {
- } else if((dest = strstarts(A+2, "locale="))) {
- locale = dest;
- } else if((dest = strstarts(A+2, "output-file="))) {
- set_file(1, dest, &out);
- } else if(streq(A+2, "version")) {
- version();
- } else if(streq(A+2, "help")) {
- syntax();
- } else if (expect_in_fn) {
- set_file(0, A, &in);
- expect_in_fn = 0;
- }
+ ) {
+ } else if((locale = strstarts(A+2, "locale="))) {
+ } else if((dest = strstarts(A+2, "output-file="))) {
+ set_file(1, dest, &out);
+ } else if(streq(A+2, "version")) {
+ version();
+ return 0;
+ } else if(streq(A+2, "help")) {
+ syntax();
+ return 0;
+ } else if (expect_in_fn) {
+ set_file(0, A, &in);
+ expect_in_fn = 0;
+ }
} else if(streq(A + 1, "o")) {
arg++;
dest = A;
@@ -568,8 +342,10 @@ int main(int argc, char**argv) {
) {
} else if (streq(A+1, "V")) {
version();
+ return 0;
} else if (streq(A+1, "h")) {
syntax();
+ return 0;
} else if (streq(A+1, "l")) {
arg++;
locale = A;
@@ -587,14 +363,14 @@ int main(int argc, char**argv) {
}
if (locale != NULL && dest != NULL) {
- int sz = snprintf(NULL, 0, "%s/%s.msg", dest, locale);
- char msg[sz+1];
- snprintf(msg, sizeof(msg), "%s/%s.msg", dest, locale);
- FILE *fp = fopen(msg, "w");
+ snprintf(path, sizeof(path), "%s/%s.msg", dest, locale);
+ FILE *fp = fopen(path, "w");
if (fp) {
fclose(fp);
return 0;
- } else return 1;
+ }
+
+ return -1;
}
if(out == NULL) {
@@ -603,14 +379,16 @@ int main(int argc, char**argv) {
}
if(in == NULL || out == NULL) {
- return 1;
+ return -1;
}
+
int ret = process(in, out);
fflush(in); fflush(out);
+
if(in != stdin) fclose(in);
if(out != stdout) fclose(out);
- if (ret == 1) {
+ if (ret < 0) {
return remove(dest);
}
return ret;
diff --git a/src/msgmerge.c b/src/msgmerge.c
index 53bc90a..fe6dfdb 100644
--- a/src/msgmerge.c
+++ b/src/msgmerge.c
@@ -36,37 +36,58 @@ struct fiLes {
FILE *po;
FILE *pot;
FILE *compend;
- int plural_count;
- enum po_entry prev_type;
+
+ // the biggest length of a string
+ enum po_stage stage;
+ size_t len;
+ char *buf;
};
/* currently we only output input strings as output strings
* i.e. there is no translation lookup at all */
-int process_line_callback(struct po_info* info, void* user) {
- char convbuf[16384];
+int process_line_callback(po_message_t msg, void* user) {
struct fiLes* file = (struct fiLes*) user;
+ int i;
+ switch (file->stage) {
+ case ps_size:
+ if (msg->ctxt_len > file->len)
+ file->len = msg->ctxt_len + 1;
+
+ if (msg->id_len > file->len)
+ file->len = msg->id_len + 1;
+
+ if (msg->plural_len > file->len)
+ file->len = msg->plural_len + 1;
+
+ for (i=0; i < MAX_NPLURALS; i++)
+ if (msg->strlen[i] > file->len)
+ file->len = msg->strlen[i] + 1;
- // escape what is unescaped automatically by lib
- escape(info->text, convbuf, sizeof(convbuf));
- switch (info->type) {
- case pe_msgid:
- file->plural_count = 1;
- fprintf(file->out, "\nmsgid \"%s\"\n", convbuf);
- file->prev_type = info->type;
- break;
- case pe_ctxt:
- fprintf(file->out, "msgctxt \"%s\"\n", convbuf);
- break;
- case pe_plural:
- fprintf(file->out, "msgid_plural \"%s\"\n", convbuf);
- file->prev_type = info->type;
break;
- case pe_msgstr:
- if (file->prev_type == pe_plural) {
- fprintf(file->out, "msgstr[%d] \"%s\"\n", file->plural_count++, convbuf);
+ case ps_parse:
+ if (msg->ctxt_len) {
+ escape(msg->ctxt, file->buf, file->len);
+ fprintf(file->out, "msgctxt \"%s\"\n", file->buf);
+ }
+
+ escape(msg->id, file->buf, file->len);
+ fprintf(file->out, "msgid \"%s\"\n", file->buf);
+
+ if (msg->plural_len) {
+ escape(msg->plural, file->buf, file->len);
+ fprintf(file->out, "msgid_plural \"%s\"\n", file->buf);
+ }
+
+ if (msg->plural_len) {
+ for (i=0; i < MAX_NPLURALS && msg->strlen[i]; i++) {
+ escape(msg->str[i], file->buf, file->len);
+ fprintf(file->out, "msgstr[%d] \"%s\"\n", i, file->buf);
+ }
} else {
- fprintf(file->out, "msgstr \"%s\"\n", convbuf);
+ escape(msg->str[0], file->buf, file->len);
+ fprintf(file->out, "msgstr \"%s\"\n", file->buf);
}
+
break;
}
return 0;
@@ -74,13 +95,35 @@ int process_line_callback(struct po_info* info, void* user) {
int process(struct fiLes *files, int update, int backup) {
(void) update; (void) backup;
+ enum po_error t;
struct po_parser pb, *p = &pb;
char line[4096], conv[8192], *lb;
+
+ files->stage = ps_size;
poparser_init(p, conv, sizeof(conv), process_line_callback, files);
while((lb = fgets(line, sizeof(line), files->po))) {
- poparser_feed_line(p, lb, sizeof(line));
+ if ((t = poparser_feed_line(p, lb, strlen(line))) != po_success)
+ return t;
+ }
+ if ((t = poparser_finish(p)) != po_success)
+ free(files->buf);
+
+ files->stage = ps_parse;
+ files->buf = (char*)malloc(files->len);
+ fseek(files->po, 0, SEEK_SET);
+
+ while((lb = fgets(line, sizeof(line), files->po))) {
+ if ((t = poparser_feed_line(p, lb, strlen(line))) != po_success) {
+ free(files->buf);
+ return t;
+ }
}
- poparser_finish(p);
+ if ((t = poparser_finish(p)) != po_success) {
+ free(files->buf);
+ return t;
+ }
+
+ free(files->buf);
return 0;
}
diff --git a/src/poparser.c b/src/poparser.c
index faf7e27..dc4cf86 100644
--- a/src/poparser.c
+++ b/src/poparser.c
@@ -1,218 +1,381 @@
#include <ctype.h>
-#include <assert.h>
#include <stdlib.h>
#include <string.h>
#include <iconv.h>
#include "poparser.h"
#include "StringEscape.h"
-#define streq(A, B) (!strcmp(A, B))
#define strstarts(S, W) (memcmp(S, W, sizeof(W) - 1) ? NULL : (S + (sizeof(W) - 1)))
-static unsigned fuzzymark = 0;
-static enum po_entry get_type_and_start(struct po_info *info, char* lp, char* end, size_t *stringstart) {
- enum po_entry result_type;
- char *x, *y;
- size_t start = (size_t) lp;
- while(isspace(*lp) && lp < end) lp++;
- if(lp[0] == '#') {
- char *s;
- if(s = strstr(lp, ", fuzzy")) {
- if(fuzzymark != 0) fuzzymark++;
- else fuzzymark=2;
- }
- inv:
- *stringstart = 0;
- return pe_invalid;
- } else if((y = strstarts(lp, "msg"))) {
- if((x = strstarts(y, "id")) && isspace(*x))
- result_type = pe_msgid;
- else if ((x = strstarts(y, "id_plural")) && isspace(*x))
- result_type = pe_plural;
- else if ((x = strstarts(y, "ctxt")) && isspace(*x))
- result_type = pe_ctxt;
- else if ((x = strstarts(y, "str")) && (isspace(*x) ||
- (x[0] == '[' && (x[1]-'0') < info->nplurals && x[2] == ']' && (x += 3) && isspace(*x))))
- result_type = pe_msgstr;
- else
- goto inv;
- while(isspace(*x) && x < end) x++;
- if(*x != '"') abort();
- conv:
- *stringstart = ((size_t) x - start) + 1;
- } else if(lp[0] == '"') {
- if(!(*info->charset)) {
- if(x = strstr(lp, "charset=")) {
- // charset=xxx\\n
- int len = strlen(x+=8) - 4;
- assert(len <= 11);
- if(strncmp(x, "UTF-8", 5) && strncmp(x, "utf-8", 5)) {
- memcpy(info->charset, x, len);
- info->charset[len] = 0;
- }
- }
- }
- if(x = strstr(lp, "nplurals="))
- if(*(x+9) - '0')
- info->nplurals = *(x+9) - '0';
- result_type = pe_str;
- x = lp;
- goto conv;
- } else {
- goto inv;
- }
- return result_type;
-}
-
-/* expects a pointer to the first char after a opening " in a string,
- * converts the string into convbuf, and returns the length of that string */
-static size_t get_length_and_convert(struct po_info *info, char* x, char* end, char* convbuf, size_t convbuflen) {
- size_t result = 0;
- char* e = x + strlen(x);
- assert(e > x && e < end && *e == 0);
- e--;
- while(isspace(*e)) e--;
- if(*e != '"') abort();
- *e = 0;
- char *s;
- if(*info->charset) {
- iconv_t ret = iconv_open("UTF-8", info->charset);
- if(ret != (iconv_t)-1) {
- size_t a=end-x, b=a*4;
- char mid[b], *midp=mid;
- iconv(iconv_open("UTF-8", info->charset), &x, &a, &midp, &b);
- if(s = strstr(mid, "charset="))
- memcpy(s+8, "UTF-8\\n\0", 8);
- result = unescape(mid, convbuf, convbuflen);
- // iconv doesnt recognize the encoding
- } else result = unescape(x, convbuf, convbuflen);
- } else result = unescape(x, convbuf, convbuflen);
- return result;
-}
+static const char* sysdep_str[st_max]={
+ [st_priu32] = "<PRIu32>",
+ [st_priu64] = "<PRIu64>",
+ [st_priumax] = "<PRIuMAX>",
+};
+static const char* sysdep_repl[st_max][3]={
+ [st_priu32] = {"\x2", "u", "lu"},
+ [st_priu64] = {"\x2", "lu", "llu"},
+ [st_priumax] = {"\x1", "ju"},
+};
void poparser_init(struct po_parser *p, char* workbuf, size_t bufsize, poparser_callback cb, void* cbdata) {
+ int cnt;
+ memset(p, 0, sizeof(struct po_parser));
p->buf = workbuf;
p->bufsize = bufsize;
p->cb = cb;
- p->prev_type = pe_invalid;
- p->prev_rtype = pe_invalid;
- p->curr_len = 0;
p->cbdata = cbdata;
- *(p->info.charset) = 0;
- // nplurals = 2 by default
- p->info.nplurals = 2;
- fuzzymark = 0;
+ p->hdr.nplurals = MAX_NPLURALS;
+ p->max_ctxt_len = 1;
+ p->max_id_len = 1;
+ p->max_plural_len = 1;
+ for (cnt = 0; cnt < MAX_NPLURALS; cnt++)
+ p->max_strlen[cnt] = 1;
+ p->strcnt = 0;
+ p->first = true;
}
-enum lineactions {
- la_incr,
- la_proc,
- la_abort,
- la_nop,
- la_max,
-};
+static inline enum po_error poparser_feed_hdr(struct po_parser *p, po_message_t msg) {
+ char *x, *y;
+ if (p->stage == ps_parse && p->first) {
+ if (msg->id_len)
+ return -po_invalid_entry;
+
+ if ((x = strstr(msg->str[0], "charset="))) {
+ for (y = x; *y && !isspace(*y); y++);
+ memcpy(p->hdr.charset, x+8, y-x-8);
+ p->hdr.charset[y-x] = 0;
+
+ p->cd = iconv_open("UTF-8", p->hdr.charset);
+ if (p->cd == (iconv_t)-1) {
+ p->cd = 0;
+ return -po_unsupported_charset;
+ }
+ }
+
+ if ((x = strstr(msg->str[0], "nplurals="))) {
+ p->hdr.nplurals = *(x+9) - '0';
+ }
+
+ p->first = false;
+ }
+
+ return po_success;
+}
+
+static inline enum po_error poparser_clean(struct po_parser *p, po_message_t msg) {
+ enum po_error t;
+ int i;
+
+ if (p->strcnt) {
+ msg->strlen[p->strcnt] = 0;
+
+ if ((t = poparser_feed_hdr(p, msg)) != po_success) {
+ return t;
+ }
+
+ // met a new block starting with msgid
+ if (p->cb)
+ p->cb(msg, p->cbdata);
-/* return 0 on success */
-int poparser_feed_line(struct po_parser *p, char* line, size_t buflen) {
- char *convbuf = p->buf;
- size_t convbuflen = p->bufsize;
- size_t strstart;
-
- static const enum lineactions action_tbl[pe_max][pe_max] = {
- // pe_str will never be set as curr_type
- [pe_str] = {
- [pe_str] = la_abort,
- [pe_msgid] = la_abort,
- [pe_ctxt] = la_abort,
- [pe_plural] = la_abort,
- [pe_msgstr] = la_abort,
- [pe_invalid] = la_abort,
- },
- [pe_msgid] = {
- [pe_str] = la_incr,
- [pe_msgid] = la_abort,
- [pe_ctxt] = la_abort,
- [pe_plural] = la_proc,
- [pe_msgstr] = la_proc,
- [pe_invalid] = la_proc,
- },
- [pe_ctxt] = {
- [pe_str] = la_incr,
- [pe_msgid] = la_proc,
- [pe_ctxt] = la_abort,
- [pe_plural] = la_abort,
- [pe_msgstr] = la_abort,
- [pe_invalid] = la_proc,
- },
- [pe_plural] = {
- [pe_str] = la_incr,
- [pe_msgid] = la_abort,
- [pe_ctxt] = la_abort,
- [pe_plural] = la_abort,
- [pe_msgstr] = la_proc,
- [pe_invalid] = la_proc,
- },
- [pe_msgstr] = {
- [pe_str] = la_incr,
- [pe_msgid] = la_proc,
- [pe_ctxt] = la_proc,
- [pe_plural] = la_abort,
- [pe_msgstr] = la_proc,
- [pe_invalid] = la_proc,
- },
- [pe_invalid] = {
- [pe_str] = la_nop,
- [pe_msgid] = la_incr,
- [pe_ctxt] = la_incr,
- [pe_plural] = la_nop,
- [pe_msgstr] = la_nop,
- [pe_invalid] = la_nop,
- },
- };
-
- enum po_entry type;
-
- type = get_type_and_start(&p->info, line, line + buflen, &strstart);
- if(p->prev_rtype != pe_invalid && action_tbl[p->prev_rtype][type] == la_abort)
- abort();
- if(type != pe_invalid && type != pe_str)
- p->prev_rtype = type;
- if(fuzzymark) {
- if(type == pe_ctxt && fuzzymark == 1) fuzzymark--;
- if(type == pe_msgid) fuzzymark--;
- if(fuzzymark > 0) return 0;
+ for (i=0; i < st_max; i++)
+ msg->sysdep[i] = 0;
+
+ msg->ctxt_len = 0;
+ msg->id_len = 0;
+ msg->plural_len = 0;
+ p->strcnt = 0;
}
- switch(action_tbl[p->prev_type][type]) {
- case la_incr:
- assert(type == pe_msgid || type == pe_msgstr || type == pe_str || type == pe_plural || pe_ctxt);
- p->curr_len += get_length_and_convert(&p->info, line + strstart, line + buflen, convbuf + p->curr_len, convbuflen - p->curr_len);
+
+ return po_success;
+}
+
+enum po_error poparser_feed_line(struct po_parser *p, char* in, size_t in_len) {
+ char *line = in;
+ size_t line_len = in_len;
+ po_message_t msg = &p->msg;
+ int cnt = 0;
+ enum po_error t;
+ size_t len;
+ char *x, *y, *z;
+
+ if (line_len == 0 || line[0] == '\n') {
+ // ignore blank lines
+ return po_success;
+ } else if (line[0] == '#') {
+ switch (line[1]) {
+ case ',':
+ x = &line[2];
+ while (*x && (y = strpbrk(x, " ,\n"))) {
+ if (y != x && !memcmp(x, "fuzzy", y-x)) {
+ msg->flags |= PO_FUZZY;
+ }
+ x = y + strspn(y, " ,\n");
+ }
break;
- case la_proc:
- assert(p->prev_type == pe_msgid || p->prev_type == pe_msgstr || p->prev_type == pe_plural || p->prev_type == pe_ctxt);
- p->info.text = convbuf;
- p->info.textlen = p->curr_len;
- p->info.type = p->prev_type;
- p->cb(&p->info, p->cbdata);
- if(type != pe_invalid)
- p->curr_len = get_length_and_convert(&p->info, line + strstart, line + buflen, convbuf, convbuflen);
- else
- p->curr_len = 0;
+ case '.':
+ // extracted comments for translators, ignore
+ case ':':
+ // reference comments for translators, ignore
+ case '|':
+ // previous untranslated strings for translators, ignore
+ default:
+ // ignore normal comments
+ return po_success;
+ }
+ } else if (line[0] == '"') {
+ if ( (y = strrchr(x = &line[1], '"')) == NULL)
+ return -po_excepted_token;
+
+ len = y - x;
+ *y = 0;
+
+ if (p->cd) {
+ line = x;
+ line_len = len + 1;
+ x = p->buf;
+ len = p->bufsize;
+ if ( (len = iconv(p->cd, &line, &line_len, &x, &len)) == -1)
+ return -po_failed_iconv;
+
+ x = p->buf;
+ }
+
+ for (cnt = 0; cnt < st_max; cnt++) {
+ if (strstr(x, sysdep_str[cnt])) {
+ msg->sysdep[cnt] = sysdep_repl[cnt][0][0];
+ }
+ }
+
+ switch (p->previous) {
+ case po_str:
+ cnt = p->strcnt - 1;
+ if (p->stage == ps_parse) {
+ len = unescape(x, &msg->str[cnt][msg->strlen[cnt]], p->max_strlen[cnt]);
+ }
+
+ msg->strlen[cnt] += len;
+ break;
+ case po_plural:
+ if (p->stage == ps_parse) {
+ len = unescape(x, &msg->plural[msg->plural_len], p->max_plural_len);
+ }
+
+ msg->plural_len += len;
break;
- case la_nop:
+ case po_id:
+ if (p->stage == ps_parse) {
+ len = unescape(x, &msg->id[msg->id_len], p->max_id_len);
+ }
+
+ msg->id_len += len;
+ break;
+ case po_ctxt:
+ if (p->stage == ps_parse) {
+ len = unescape(x, &msg->ctxt[msg->ctxt_len], p->max_ctxt_len);
+ }
+
+ msg->ctxt_len += len;
break;
- case la_abort:
default:
- abort();
- // todo : return error code
+ return -po_invalid_entry;
+ }
+ } else if ((z = strstarts(line, "msg"))) {
+ if ( (x = strchr(z, '"')) == NULL)
+ return -po_excepted_token;
+
+ if ( (y = strrchr(x+1, '"')) == NULL)
+ return -po_excepted_token;
+
+ len = y - ++x;
+ *y = 0;
+
+ if (p->cd) {
+ line = x;
+ line_len = len + 1;
+ x = p->buf;
+ len = p->bufsize;
+ if ( (len = iconv(p->cd, &line, &line_len, &x, &len)) == -1)
+ return -po_failed_iconv;
+
+ x = p->buf;
+ }
+
+ if ((y = strstarts(z, "ctxt")) && isspace(*y)) {
+ if ( (t = poparser_clean(p, msg)) != po_success)
+ return t;
+
+ if (msg->id_len || msg->plural)
+ return -po_invalid_entry;
+
+ for (cnt = 0; cnt < st_max; cnt++) {
+ if (strstr(x, sysdep_str[cnt])) {
+ msg->sysdep[cnt] = sysdep_repl[cnt][0][0];
+ }
+ }
+
+ if (p->stage == ps_parse) {
+ if (msg->ctxt == NULL) {
+ return -po_internal;
+ }
+
+ len = unescape(x, msg->ctxt, p->max_ctxt_len);
+ }
+
+ msg->ctxt_len = len;
+ p->previous = po_ctxt;
+ } else if ((y = strstarts(z, "id")) && isspace(*y)) {
+ if ( (t = poparser_clean(p, msg)) != po_success)
+ return t;
+
+ if (msg->plural_len)
+ return -po_invalid_entry;
+
+ for (cnt = 0; cnt < st_max; cnt++) {
+ if (strstr(x, sysdep_str[cnt])) {
+ msg->sysdep[cnt] = sysdep_repl[cnt][0][0];
+ }
+ }
+
+ if (p->stage == ps_parse) {
+ if (msg->id == NULL) {
+ return -po_internal;
+ }
+
+ len = unescape(x, msg->id, p->max_id_len);
+ }
+
+ msg->id_len = len;
+ p->previous = po_id;
+ } else if ((y = strstarts(z, "id_plural")) && isspace(*y)) {
+ if (!msg->id_len || p->strcnt)
+ return -po_invalid_entry;
+
+ if (p->stage == ps_parse) {
+ if (msg->plural == NULL) {
+ return -po_internal;
+ }
+
+ len = unescape(x, msg->plural, p->max_plural_len);
+ }
+
+ msg->plural_len = len;
+ p->previous = po_plural;
+ } else if ((y = strstarts(z, "str"))) {
+ if (!msg->id_len && !p->first)
+ return -po_invalid_entry;
+
+ if (isspace(*y)) {
+ if (p->strcnt || msg->plural_len)
+ return -po_invalid_entry;
+
+ cnt = (p->strcnt = 1) - 1;
+ } else if (*y == '[') {
+ if (!msg->plural_len)
+ return -po_invalid_entry;
+
+ if (y[2] != ']' || !isspace(y[3])) return -po_excepted_token;
+
+ p->strcnt = (cnt = y[1] - '0') + 1;
+
+ if (p->strcnt > p->hdr.nplurals) {
+ return -po_plurals_overflow;
+ }
+ } else {
+ return -po_excepted_token;
+ }
+
+ if (p->stage == ps_parse) {
+ if (msg->str[cnt] == NULL) {
+ return -po_internal;
+ }
+
+ len = unescape(x, msg->str[cnt], p->max_strlen[cnt]);
+ }
+
+ msg->strlen[cnt] = len;
+ p->previous = po_str;
+ } else {
+ return -po_invalid_entry;
+ }
}
- if(type != pe_str) {
- p->prev_type = type;
+
+ if (p->stage == ps_size) {
+ if (p->max_strlen[cnt] < msg->strlen[cnt])
+ p->max_strlen[cnt] = msg->strlen[cnt] + 1;
+ if (p->max_plural_len < msg->plural_len)
+ p->max_plural_len = msg->plural_len + 1;
+ if (p->max_id_len < msg->id_len)
+ p->max_id_len = msg->id_len + 1;
+ if (p->max_ctxt_len < msg->ctxt_len)
+ p->max_ctxt_len = msg->ctxt_len + 1;
}
- return 0;
+
+ return po_success;
}
-int poparser_finish(struct po_parser *p) {
- char empty[4] = "";
- return poparser_feed_line(p, empty, sizeof(empty));
+enum po_error poparser_finish(struct po_parser *p) {
+ size_t len;
+ int cnt;
+ enum po_error t;
+ po_message_t msg = &p->msg;
+
+ if (p->stage == ps_size) {
+ if ( (t = poparser_clean(p, msg)) != po_success)
+ return t;
+
+ len = p->max_ctxt_len;
+ len += p->max_id_len;
+ len += p->max_plural_len;
+ for (cnt = 0; cnt < MAX_NPLURALS; cnt++)
+ len += p->max_strlen[cnt];
+
+ memset(msg, 0, sizeof(struct po_message));
+ msg->ctxt = (char*)malloc(len);
+ msg->id = msg->ctxt + p->max_ctxt_len;
+ msg->plural = msg->id + p->max_id_len;
+ msg->str[0] = msg->plural + p->max_plural_len;
+ for (cnt = 1; cnt < MAX_NPLURALS; cnt++)
+ msg->str[cnt] = msg->str[cnt-1] + p->max_strlen[cnt-1];
+
+ p->hdr.nplurals = 2;
+ } else {
+ if ( (t = poparser_clean(p, msg)) != po_success)
+ return t;
+ if (msg->ctxt) free(msg->ctxt);
+ if (p->cd) iconv_close(p->cd);
+ }
+
+ if (p->stage < ps_parse) p->stage++;
+
+ return po_success;
+}
+
+size_t poparser_sysdep(const char *in, char *out, int cnt[]) {
+ const char *x, *y, *outs;
+ int n;
+ outs = out;
+ x = in;
+
+ for (n=0; n < st_max;) {
+ if ((y = strstr(x, sysdep_str[n])) && *(y-1) == '%') {
+ if (outs)
+ memcpy(out, x, y-x);
+ out += y-x;
+ x = y + strlen(sysdep_str[n]);
+
+ y = sysdep_repl[n][cnt[n]+1];
+ if (outs)
+ memcpy(out, y, strlen(y));
+ out += strlen(y);
+
+ n = 0;
+ } else n++;
+ }
+
+ if (outs)
+ memcpy(out, x, strlen(x) + 1);
+ out += strlen(x) + 1;
+ return out - outs;
}
diff --git a/src/poparser.h b/src/poparser.h
index 29b7b16..1e4a589 100644
--- a/src/poparser.h
+++ b/src/poparser.h
@@ -1,41 +1,97 @@
#ifndef POPARSER_H
#define POPARSER_H
-#include <unistd.h>
-enum po_entry {
- pe_msgid = 0,
- pe_plural,
- pe_ctxt,
- pe_msgstr,
- pe_maxstr,
- pe_str = pe_maxstr,
- pe_invalid,
- pe_max,
+#include <iconv.h>
+#include <stdbool.h>
+#include <stdlib.h>
+
+#define MAX_NPLURALS 6
+
+enum sysdep_types {
+ st_priu32 = 0,
+ st_priu64,
+ st_priumax,
+ st_max
};
-struct po_info {
- enum po_entry type;
- char *text;
+// make sure out has equal or more space than in
+size_t poparser_sysdep(const char *in, char *out, int cnt[]);
+
+struct po_header {
char charset[12];
- unsigned int nplurals;
- size_t textlen;
+ unsigned nplurals;
+ // maybe parse the header later
+};
+
+#define PO_FUZZY 1u
+
+struct po_message {
+ char *ctxt;
+ char *id;
+ char *plural;
+ char* str[MAX_NPLURALS];
+
+ int sysdep[st_max];
+ size_t ctxt_len;
+ size_t id_len;
+ size_t plural_len;
+ size_t strlen[MAX_NPLURALS];
+ // h.......1.0
+ // |-------|a|
+ // |.......|a|
+ int flags;
};
+typedef struct po_message *po_message_t;
-typedef int (*poparser_callback)(struct po_info* info, void* user);
+typedef int (*poparser_callback)(po_message_t msg, void* user);
+
+enum po_stage {
+ // collect size of every msg
+ ps_size = 0,
+ // parse
+ ps_parse,
+ ps_max = ps_parse,
+};
+
+enum po_entry {
+ po_ctxt = 0,
+ po_id,
+ po_plural,
+ po_str,
+};
struct po_parser {
- struct po_info info;
+ struct po_header hdr;
+ struct po_message msg;
+ enum po_stage stage;
+
+ // private parts
+ bool first;
+ iconv_t cd;
+ enum po_entry previous;
+ int strcnt;
+ size_t max_ctxt_len;
+ size_t max_id_len;
+ size_t max_plural_len;
+ size_t max_strlen[MAX_NPLURALS];
char *buf;
size_t bufsize;
- enum po_entry prev_type;
- enum po_entry prev_rtype;
- unsigned curr_len;
poparser_callback cb;
void *cbdata;
};
+enum po_error {
+ po_success = 0,
+ po_unsupported_charset,
+ po_failed_iconv,
+ po_excepted_token,
+ po_plurals_overflow,
+ po_invalid_entry,
+ po_internal,
+};
+
void poparser_init(struct po_parser *p, char* workbuf, size_t bufsize, poparser_callback cb, void* cbdata);
-int poparser_feed_line(struct po_parser *p, char* line, size_t buflen);
-int poparser_finish(struct po_parser *p);
+enum po_error poparser_feed_line(struct po_parser *p, char* line, size_t buflen);
+enum po_error poparser_finish(struct po_parser *p);
#endif