summaryrefslogtreecommitdiff
path: root/src/hb-ot-shape-complex-khmer.hh
diff options
context:
space:
mode:
authorBehdad Esfahbod <behdad@behdad.org>2018-10-01 19:09:58 +0200
committerBehdad Esfahbod <behdad@behdad.org>2018-10-01 19:23:47 +0200
commit51436547162a18e88144e7125ad6ce4a69a08d4b (patch)
tree86dc145119a9b8a58aef672a3354a3bcc89ecdb2 /src/hb-ot-shape-complex-khmer.hh
parentaaaa65baa7fcfb65ae814528bdd93cc5c4ea540d (diff)
downloadharfbuzz-51436547162a18e88144e7125ad6ce4a69a08d4b.tar.gz
[khmer] Rewrite grammar completely
Based on experimenting with Uniscribe to extract grammar and categories. Failures down from 44 to 35: KHMER: 299089 out of 299124 tests passed. 35 failed (0.0117008%) We still don't enforce the one-matra rule pre-decomposition, but enforce an order and one-matra-per-position post-decomposition. https://github.com/harfbuzz/harfbuzz/issues/667
Diffstat (limited to 'src/hb-ot-shape-complex-khmer.hh')
-rw-r--r--src/hb-ot-shape-complex-khmer.hh99
1 files changed, 51 insertions, 48 deletions
diff --git a/src/hb-ot-shape-complex-khmer.hh b/src/hb-ot-shape-complex-khmer.hh
index c86e7aad..4ee0b838 100644
--- a/src/hb-ot-shape-complex-khmer.hh
+++ b/src/hb-ot-shape-complex-khmer.hh
@@ -34,30 +34,22 @@
/* buffer var allocations */
#define khmer_category() indic_category() /* khmer_category_t */
-#define khmer_position() indic_position() /* khmer_position_t */
+#define khmer_position() indic_position() /* indic_position_t */
-typedef indic_category_t khmer_category_t;
-typedef indic_position_t khmer_position_t;
-
-
-static inline khmer_position_t
-matra_position_khmer (khmer_position_t side)
+/* Note: This enum is duplicated in the -machine.rl source file.
+ * Not sure how to avoid duplication. */
+enum khmer_category_t
{
- switch ((int) side)
- {
- case POS_PRE_C:
- return POS_PRE_M;
+ OT_Robatic = 20,
+ OT_Xgroup = 21,
+ OT_Ygroup = 22,
- case POS_POST_C:
- case POS_ABOVE_C:
- case POS_BELOW_C:
- return POS_AFTER_POST;
-
- default:
- return side;
- };
-}
+ OT_VAbv = 26,
+ OT_VBlw = 27,
+ OT_VPre = 28,
+ OT_VPst = 29,
+};
static inline void
set_khmer_properties (hb_glyph_info_t &info)
@@ -65,47 +57,58 @@ set_khmer_properties (hb_glyph_info_t &info)
hb_codepoint_t u = info.codepoint;
unsigned int type = hb_indic_get_categories (u);
khmer_category_t cat = (khmer_category_t) (type & 0x7Fu);
- khmer_position_t pos = (khmer_position_t) (type >> 8);
+ indic_position_t pos = (indic_position_t) (type >> 8);
/*
* Re-assign category
+ *
+ * These categories are experimentally extracted from what Uniscribe allows.
*/
-
- if (unlikely (u == 0x17C6u)) cat = OT_N; /* Khmer Bindu doesn't like to be repositioned. */
- else if (unlikely (hb_in_range<hb_codepoint_t> (u, 0x17CDu, 0x17D1u) ||
- u == 0x17CBu || u == 0x17D3u || u == 0x17DDu)) /* Khmer Various signs */
+ switch (u)
{
- /* These can occur mid-syllable (eg. before matras), even though Unicode marks them as Syllable_Modifier.
- * https://github.com/roozbehp/unicode-data/issues/5 */
- cat = OT_M;
- pos = POS_ABOVE_C;
+ case 0x179Au:
+ cat = (khmer_category_t) OT_Ra;
+ break;
+
+ case 0x17CCu:
+ case 0x17C9u:
+ case 0x17CAu:
+ cat = OT_Robatic;
+ break;
+
+ case 0x17C6u:
+ case 0x17CBu:
+ case 0x17CDu:
+ case 0x17CEu:
+ case 0x17CFu:
+ case 0x17D0u:
+ case 0x17D1u:
+ cat = OT_Xgroup;
+ break;
+
+ case 0x17C7u:
+ case 0x17C8u:
+ case 0x17DDu:
+ case 0x17D3u: /* Just guessing. Uniscribe doesn't categorize it. */
+ cat = OT_Ygroup;
+ break;
}
- else if (unlikely (hb_in_range<hb_codepoint_t> (u, 0x2010u, 0x2011u))) cat = OT_PLACEHOLDER;
- else if (unlikely (u == 0x25CCu)) cat = OT_DOTTEDCIRCLE;
-
/*
* Re-assign position.
*/
-
- if ((FLAG_UNSAFE (cat) & CONSONANT_FLAGS))
- {
- pos = POS_BASE_C;
- if (u == 0x179Au)
- cat = OT_Ra;
- }
- else if (cat == OT_M)
- {
- pos = matra_position_khmer (pos);
- }
- else if ((FLAG_UNSAFE (cat) & (FLAG (OT_SM) | FLAG (OT_A) | FLAG (OT_Symbol))))
- {
- pos = POS_SMVD;
- }
+ if (cat == (khmer_category_t) OT_M)
+ switch ((int) pos)
+ {
+ case POS_PRE_C: cat = OT_VPre; break;
+ case POS_BELOW_C: cat = OT_VBlw; break;
+ case POS_ABOVE_C: cat = OT_VAbv; break;
+ case POS_POST_C: cat = OT_VPst; break;
+ default: assert (0);
+ };
info.khmer_category() = cat;
- info.khmer_position() = pos;
}