summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/include/memory_chunk.h34
-rw-r--r--src/include/unaligned_memory.h43
-rw-r--r--src/storage/chewing_large_table.cpp2
-rw-r--r--src/storage/ngram.cpp6
-rw-r--r--src/storage/phrase_index.cpp23
-rw-r--r--src/storage/phrase_index.h13
-rw-r--r--src/storage/phrase_large_table2.cpp3
7 files changed, 104 insertions, 20 deletions
diff --git a/src/include/memory_chunk.h b/src/include/memory_chunk.h
index c106dba..baa5679 100644
--- a/src/include/memory_chunk.h
+++ b/src/include/memory_chunk.h
@@ -290,6 +290,25 @@ public:
}
/**
+ * MemoryChunk::set_content:
+ * @offset: the offset in this MemoryChunk.
+ * @data: the data to be copied.
+ * @returns: whether the data is copied successfully.
+ *
+ * Data are written directly to the memory area in this MemoryChunk.
+ *
+ */
+ template <typename T>
+ bool set_content(size_t offset, T data){
+ const size_t len = sizeof(data);
+ size_t cursize = std_lite::max(size(), offset + len);
+ ensure_has_space(offset + len);
+ memmove(m_data_begin + offset, &data, len);
+ m_data_end = m_data_begin + cursize;
+ return true;
+ }
+
+ /**
* MemoryChunk::append_content:
* @data: the begin of the data to be copied.
* @len: the length of the data to be copied.
@@ -357,6 +376,21 @@ public:
}
/**
+ * MemoryChunk::get_content:
+ * @offset: the offset in this MemoryChunk.
+ * @returns: the content
+ *
+ * Get the content in this MemoryChunk.
+ *
+ */
+ template <typename T>
+ T get_content(size_t offset) const {
+ T value;
+ memcpy(&value, m_data_begin + offset, sizeof(value));
+ return value;
+ }
+
+ /**
* MemoryChunk::compact_memory:
*
* Compact memory, reduce the size.
diff --git a/src/include/unaligned_memory.h b/src/include/unaligned_memory.h
new file mode 100644
index 0000000..27b2f19
--- /dev/null
+++ b/src/include/unaligned_memory.h
@@ -0,0 +1,43 @@
+#ifndef UNALIGNED_MEMORY_H
+#define UNALIGNED_MEMORY_H
+
+#include <cstring>
+
+/**
+ * UnalignedMemory: Safe unaligned memory access.
+ *
+ * Some instruction sets, or some instructions in some instruction sets
+ * require that memory access is aligned to a specific boundary. These
+ * instructions may trap on unaligned access.
+ *
+ * This class provides methods to load and store values at unaligned
+ * addresses. It ensures that the compiler doesn't generate instructions
+ * that could trap on the unaligned memory access.
+ */
+
+namespace pinyin{
+ template <typename T>
+ class UnalignedMemory{
+ public:
+ /**
+ * Read a value from a possibly unaligned memory address.
+ *
+ */
+ static T load(const void * src) {
+ T value;
+ memcpy(&value, src, sizeof(T));
+ return value;
+ }
+
+ /**
+ * Store a value into a possibly unaligned memory address.
+ *
+ */
+ static void store(T value, void * dest) {
+ memcpy(dest, &value, sizeof(T));
+ }
+ };
+};
+
+
+#endif
diff --git a/src/storage/chewing_large_table.cpp b/src/storage/chewing_large_table.cpp
index e856caf..bd76e9b 100644
--- a/src/storage/chewing_large_table.cpp
+++ b/src/storage/chewing_large_table.cpp
@@ -804,7 +804,7 @@ bool ChewingBitmapIndexLevel::store(MemoryChunk * new_chunk,
bool ChewingLengthIndexLevel::load(MemoryChunk * chunk, table_offset_t offset,
table_offset_t end) {
char * begin = (char *) chunk->begin();
- guint32 nindex = *((guint32 *)(begin + offset)); /* number of index */
+ guint32 nindex = chunk->get_content<guint32>(offset); /* number of index */
table_offset_t * index = (table_offset_t *)
(begin + offset + sizeof(guint32));
diff --git a/src/storage/ngram.cpp b/src/storage/ngram.cpp
index c952768..e4bfe8f 100644
--- a/src/storage/ngram.cpp
+++ b/src/storage/ngram.cpp
@@ -46,14 +46,12 @@ SingleGram::SingleGram(void * buffer, size_t length, bool copy){
}
bool SingleGram::get_total_freq(guint32 & total) const{
- char * buf_begin = (char *)m_chunk.begin();
- total = *((guint32 *)buf_begin);
+ total = m_chunk.get_content<guint32>(0);
return true;
}
bool SingleGram::set_total_freq(guint32 total){
- char * buf_begin = (char *)m_chunk.begin();
- *((guint32 *)buf_begin) = total;
+ m_chunk.set_content<guint32>(0, total);
return true;
}
diff --git a/src/storage/phrase_index.cpp b/src/storage/phrase_index.cpp
index e9e9d5d..de864c2 100644
--- a/src/storage/phrase_index.cpp
+++ b/src/storage/phrase_index.cpp
@@ -20,6 +20,7 @@
#include "phrase_index.h"
#include "pinyin_custom2.h"
+#include "unaligned_memory.h"
namespace pinyin{
@@ -61,10 +62,12 @@ bool PhraseItem::add_pronunciation(ChewingKey * keys, guint32 delta){
for (int i = 0; i < npron; ++i) {
char * chewing_begin = buf_begin + offset +
i * (phrase_length * sizeof(ChewingKey) + sizeof(guint32));
- guint32 * freq = (guint32 *)(chewing_begin +
- phrase_length * sizeof(ChewingKey));
+
+ guint32 * pfreq = (guint32 *)(chewing_begin +
+ phrase_length * sizeof(ChewingKey));
+ guint32 freq = UnalignedMemory<guint32>::load(pfreq);
- total_freq += *freq;
+ total_freq += freq;
if (0 == pinyin_exact_compare2
(keys, (ChewingKey *)chewing_begin, phrase_length)) {
@@ -74,8 +77,9 @@ bool PhraseItem::add_pronunciation(ChewingKey * keys, guint32 delta){
if (delta > 0 && total_freq > total_freq + delta)
return false;
- *freq += delta;
+ freq += delta;
total_freq += delta;
+ UnalignedMemory<guint32>::store(freq, pfreq);
return true;
}
}
@@ -117,9 +121,11 @@ void PhraseItem::increase_pronunciation_possibility(ChewingKey * keys,
for (int i = 0; i < npron; ++i) {
char * chewing_begin = buf_begin + offset +
i * (phrase_length * sizeof(ChewingKey) + sizeof(guint32));
- guint32 * freq = (guint32 *)(chewing_begin +
- phrase_length * sizeof(ChewingKey));
- total_freq += *freq;
+
+ guint32 * pfreq = (guint32 *)(chewing_begin +
+ phrase_length * sizeof(ChewingKey));
+ guint32 freq = UnalignedMemory<guint32>::load(pfreq);
+ total_freq += freq;
if (0 == pinyin_compare_with_tones(keys, (ChewingKey *)chewing_begin,
phrase_length)) {
@@ -128,8 +134,9 @@ void PhraseItem::increase_pronunciation_possibility(ChewingKey * keys,
if (delta > 0 && total_freq > total_freq + delta)
return;
- *freq += delta;
+ freq += delta;
total_freq += delta;
+ UnalignedMemory<guint32>::store(freq, pfreq);
}
}
}
diff --git a/src/storage/phrase_index.h b/src/storage/phrase_index.h
index 83dfb51..7e910ed 100644
--- a/src/storage/phrase_index.h
+++ b/src/storage/phrase_index.h
@@ -31,6 +31,7 @@
#include "memory_chunk.h"
#include "phrase_index_logger.h"
#include "table_info.h"
+#include "unaligned_memory.h"
/**
* Phrase Index File Format
@@ -121,8 +122,7 @@ public:
*
*/
guint32 get_unigram_frequency(){
- char * buf_begin = (char *)m_chunk.begin();
- return (*(guint32 *)(buf_begin + sizeof(guint8) + sizeof(guint8)));
+ return m_chunk.get_content<guint32>(sizeof(guint8) + sizeof(guint8));
}
/**
@@ -142,12 +142,13 @@ public:
for ( int i = 0 ; i < npron ; ++i){
char * chewing_begin = buf_begin + offset +
i * (phrase_length * sizeof(ChewingKey) + sizeof(guint32));
- guint32 * freq = (guint32 *)(chewing_begin +
- phrase_length * sizeof(ChewingKey));
- total_freq += *freq;
+
+ guint32 freq = UnalignedMemory<guint32>::load(chewing_begin +
+ phrase_length * sizeof(ChewingKey));
+ total_freq += freq;
if ( 0 == pinyin_compare_with_tones(keys, (ChewingKey *)chewing_begin,
phrase_length) ){
- matched += *freq;
+ matched += freq;
}
}
diff --git a/src/storage/phrase_large_table2.cpp b/src/storage/phrase_large_table2.cpp
index b72917c..28eb313 100644
--- a/src/storage/phrase_large_table2.cpp
+++ b/src/storage/phrase_large_table2.cpp
@@ -561,7 +561,8 @@ bool PhraseLengthIndexLevel2::load(MemoryChunk * chunk,
table_offset_t offset,
table_offset_t end) {
char * buf_begin = (char *) chunk->begin();
- guint32 nindex = *((guint32 *)(buf_begin + offset));
+ guint32 nindex = chunk->get_content<guint32>(offset);
+
table_offset_t * index = (table_offset_t *)
(buf_begin + offset + sizeof(guint32));