summaryrefslogtreecommitdiff
path: root/src/storage/phrase_index.cpp
diff options
context:
space:
mode:
authorMatias Larsson <matias.larsson@matthews.se>2022-11-24 18:22:55 +0100
committerPeng Wu <alexepico@gmail.com>2022-11-29 16:16:21 +0800
commit8bcea4710bd328ef74dc852d9fdffb3c47ed8abe (patch)
tree88a92418b2408d31aeb5ae21d04a5ae1c4ce4a8d /src/storage/phrase_index.cpp
parent521340f02907dae96f6f13dbf7997a34ebe864f2 (diff)
downloadlibpinyin-8bcea4710bd328ef74dc852d9fdffb3c47ed8abe.tar.gz
Fix libpinyin crash on ARMv7
Fix the alignment trap in get_unigram_frequency(). Fix also other places where this same trap could happen (depending on compiler and surrounding code). The trap happened when the ARM GCC generated a SIMD instruction (specifically VLDR) to load 32 bits in a single instruction, and when the memory address was not aligned to 32 bits. VLDR traps if the address is not aligned. GCC generated the instruction because of the cast to uint32 from the address. The fix is to allocate a uint32 variable in stack and use memcpy to copy the data to that variable. This way we ensure that appropriate instructions are generated. **Links** About the issue with GCC: https://trust-in-soft.com/blog/2020/04/06/gcc-always-assumes-aligned-pointer-accesses/ How Linux does it: https://elixir.bootlin.com/linux/v5.10.155/source/include/linux/unaligned/memmove.h#L13 ARM documentation: https://documentation-service.arm.com/static/5f8dc043f86e16515cdbbc92?token= See 'A3.2.1 Unaligned data access'
Diffstat (limited to 'src/storage/phrase_index.cpp')
-rw-r--r--src/storage/phrase_index.cpp23
1 files changed, 15 insertions, 8 deletions
diff --git a/src/storage/phrase_index.cpp b/src/storage/phrase_index.cpp
index e9e9d5d..de864c2 100644
--- a/src/storage/phrase_index.cpp
+++ b/src/storage/phrase_index.cpp
@@ -20,6 +20,7 @@
#include "phrase_index.h"
#include "pinyin_custom2.h"
+#include "unaligned_memory.h"
namespace pinyin{
@@ -61,10 +62,12 @@ bool PhraseItem::add_pronunciation(ChewingKey * keys, guint32 delta){
for (int i = 0; i < npron; ++i) {
char * chewing_begin = buf_begin + offset +
i * (phrase_length * sizeof(ChewingKey) + sizeof(guint32));
- guint32 * freq = (guint32 *)(chewing_begin +
- phrase_length * sizeof(ChewingKey));
+
+ guint32 * pfreq = (guint32 *)(chewing_begin +
+ phrase_length * sizeof(ChewingKey));
+ guint32 freq = UnalignedMemory<guint32>::load(pfreq);
- total_freq += *freq;
+ total_freq += freq;
if (0 == pinyin_exact_compare2
(keys, (ChewingKey *)chewing_begin, phrase_length)) {
@@ -74,8 +77,9 @@ bool PhraseItem::add_pronunciation(ChewingKey * keys, guint32 delta){
if (delta > 0 && total_freq > total_freq + delta)
return false;
- *freq += delta;
+ freq += delta;
total_freq += delta;
+ UnalignedMemory<guint32>::store(freq, pfreq);
return true;
}
}
@@ -117,9 +121,11 @@ void PhraseItem::increase_pronunciation_possibility(ChewingKey * keys,
for (int i = 0; i < npron; ++i) {
char * chewing_begin = buf_begin + offset +
i * (phrase_length * sizeof(ChewingKey) + sizeof(guint32));
- guint32 * freq = (guint32 *)(chewing_begin +
- phrase_length * sizeof(ChewingKey));
- total_freq += *freq;
+
+ guint32 * pfreq = (guint32 *)(chewing_begin +
+ phrase_length * sizeof(ChewingKey));
+ guint32 freq = UnalignedMemory<guint32>::load(pfreq);
+ total_freq += freq;
if (0 == pinyin_compare_with_tones(keys, (ChewingKey *)chewing_begin,
phrase_length)) {
@@ -128,8 +134,9 @@ void PhraseItem::increase_pronunciation_possibility(ChewingKey * keys,
if (delta > 0 && total_freq > total_freq + delta)
return;
- *freq += delta;
+ freq += delta;
total_freq += delta;
+ UnalignedMemory<guint32>::store(freq, pfreq);
}
}
}