summaryrefslogtreecommitdiff
path: root/mkworddic/calcfreq.c
diff options
context:
space:
mode:
Diffstat (limited to 'mkworddic/calcfreq.c')
-rw-r--r--mkworddic/calcfreq.c77
1 files changed, 77 insertions, 0 deletions
diff --git a/mkworddic/calcfreq.c b/mkworddic/calcfreq.c
new file mode 100644
index 0000000..fb4c730
--- /dev/null
+++ b/mkworddic/calcfreq.c
@@ -0,0 +1,77 @@
+/* 単語の頻度を計算する */
+#include <stdlib.h>
+#include "mkdic.h"
+
+static int
+count_nr_words(struct yomi_entry_list *yl)
+{
+ int nr = 0;
+ struct yomi_entry *ye;
+ for (ye = yl->head; ye; ye = ye->next) {
+ nr += ye->nr_entries;
+ }
+ return nr;
+}
+
+static struct word_entry **
+make_word_array(struct yomi_entry_list *yl, int nr)
+{
+ struct word_entry **array = malloc(sizeof(struct word_entry *) *
+ nr);
+ int nth = 0;
+ struct yomi_entry *ye;
+ for (ye = yl->head; ye; ye = ye->next) {
+ int i;
+ for (i = 0; i < ye->nr_entries; i++) {
+ array[nth] = &ye->entries[i];
+ nth ++;
+ }
+ }
+ return array;
+}
+
+/** qsort用の比較関数 */
+static int
+compare_word_entry_by_freq(const void *p1, const void *p2)
+{
+ const struct word_entry * const *e1 = p1;
+ const struct word_entry * const *e2 = p2;
+ return abs((*e2)->raw_freq) - abs((*e1)->raw_freq);
+}
+
+static void
+set_freq(struct word_entry **array, int nr)
+{
+ int i;
+ int percent = nr / 100;
+ for (i = 0; i < nr; i++) {
+ struct word_entry *we = array[i];
+ we->freq = 99 - (i / percent);
+ if (we->freq < 1) {
+ we->freq = 1;
+ }
+ we->freq *= 100;
+ we->freq -= we->source_order;
+ if (we->raw_freq < 0) {
+ we->freq *= -1;
+ }
+ }
+}
+
+void
+calc_freq(struct yomi_entry_list *yl)
+{
+ int nr;
+ struct word_entry **we;
+ /**/
+ nr = count_nr_words(yl);
+ we = make_word_array(yl, nr);
+ /**/
+ qsort(we, nr,
+ sizeof(struct word_entry *),
+ compare_word_entry_by_freq);
+ set_freq(we, nr);
+ /**/
+ free(we);
+}
+