diff options
-rw-r--r-- | src/include/memory_chunk.h | 34 | ||||
-rw-r--r-- | src/include/unaligned_memory.h | 43 | ||||
-rw-r--r-- | src/storage/chewing_large_table.cpp | 2 | ||||
-rw-r--r-- | src/storage/ngram.cpp | 6 | ||||
-rw-r--r-- | src/storage/phrase_index.cpp | 23 | ||||
-rw-r--r-- | src/storage/phrase_index.h | 13 | ||||
-rw-r--r-- | src/storage/phrase_large_table2.cpp | 3 |
7 files changed, 104 insertions, 20 deletions
diff --git a/src/include/memory_chunk.h b/src/include/memory_chunk.h index c106dba..baa5679 100644 --- a/src/include/memory_chunk.h +++ b/src/include/memory_chunk.h @@ -290,6 +290,25 @@ public: } /** + * MemoryChunk::set_content: + * @offset: the offset in this MemoryChunk. + * @data: the data to be copied. + * @returns: whether the data is copied successfully. + * + * Data are written directly to the memory area in this MemoryChunk. + * + */ + template <typename T> + bool set_content(size_t offset, T data){ + const size_t len = sizeof(data); + size_t cursize = std_lite::max(size(), offset + len); + ensure_has_space(offset + len); + memmove(m_data_begin + offset, &data, len); + m_data_end = m_data_begin + cursize; + return true; + } + + /** * MemoryChunk::append_content: * @data: the begin of the data to be copied. * @len: the length of the data to be copied. @@ -357,6 +376,21 @@ public: } /** + * MemoryChunk::get_content: + * @offset: the offset in this MemoryChunk. + * @returns: the content + * + * Get the content in this MemoryChunk. + * + */ + template <typename T> + T get_content(size_t offset) const { + T value; + memcpy(&value, m_data_begin + offset, sizeof(value)); + return value; + } + + /** * MemoryChunk::compact_memory: * * Compact memory, reduce the size. diff --git a/src/include/unaligned_memory.h b/src/include/unaligned_memory.h new file mode 100644 index 0000000..27b2f19 --- /dev/null +++ b/src/include/unaligned_memory.h @@ -0,0 +1,43 @@ +#ifndef UNALIGNED_MEMORY_H +#define UNALIGNED_MEMORY_H + +#include <cstring> + +/** + * UnalignedMemory: Safe unaligned memory access. + * + * Some instruction sets, or some instructions in some instruction sets + * require that memory access is aligned to a specific boundary. These + * instructions may trap on unaligned access. + * + * This class provides methods to load and store values at unaligned + * addresses. It ensures that the compiler doesn't generate instructions + * that could trap on the unaligned memory access. + */ + +namespace pinyin{ + template <typename T> + class UnalignedMemory{ + public: + /** + * Read a value from a possibly unaligned memory address. + * + */ + static T load(const void * src) { + T value; + memcpy(&value, src, sizeof(T)); + return value; + } + + /** + * Store a value into a possibly unaligned memory address. + * + */ + static void store(T value, void * dest) { + memcpy(dest, &value, sizeof(T)); + } + }; +}; + + +#endif diff --git a/src/storage/chewing_large_table.cpp b/src/storage/chewing_large_table.cpp index e856caf..bd76e9b 100644 --- a/src/storage/chewing_large_table.cpp +++ b/src/storage/chewing_large_table.cpp @@ -804,7 +804,7 @@ bool ChewingBitmapIndexLevel::store(MemoryChunk * new_chunk, bool ChewingLengthIndexLevel::load(MemoryChunk * chunk, table_offset_t offset, table_offset_t end) { char * begin = (char *) chunk->begin(); - guint32 nindex = *((guint32 *)(begin + offset)); /* number of index */ + guint32 nindex = chunk->get_content<guint32>(offset); /* number of index */ table_offset_t * index = (table_offset_t *) (begin + offset + sizeof(guint32)); diff --git a/src/storage/ngram.cpp b/src/storage/ngram.cpp index c952768..e4bfe8f 100644 --- a/src/storage/ngram.cpp +++ b/src/storage/ngram.cpp @@ -46,14 +46,12 @@ SingleGram::SingleGram(void * buffer, size_t length, bool copy){ } bool SingleGram::get_total_freq(guint32 & total) const{ - char * buf_begin = (char *)m_chunk.begin(); - total = *((guint32 *)buf_begin); + total = m_chunk.get_content<guint32>(0); return true; } bool SingleGram::set_total_freq(guint32 total){ - char * buf_begin = (char *)m_chunk.begin(); - *((guint32 *)buf_begin) = total; + m_chunk.set_content<guint32>(0, total); return true; } diff --git a/src/storage/phrase_index.cpp b/src/storage/phrase_index.cpp index e9e9d5d..de864c2 100644 --- a/src/storage/phrase_index.cpp +++ b/src/storage/phrase_index.cpp @@ -20,6 +20,7 @@ #include "phrase_index.h" #include "pinyin_custom2.h" +#include "unaligned_memory.h" namespace pinyin{ @@ -61,10 +62,12 @@ bool PhraseItem::add_pronunciation(ChewingKey * keys, guint32 delta){ for (int i = 0; i < npron; ++i) { char * chewing_begin = buf_begin + offset + i * (phrase_length * sizeof(ChewingKey) + sizeof(guint32)); - guint32 * freq = (guint32 *)(chewing_begin + - phrase_length * sizeof(ChewingKey)); + + guint32 * pfreq = (guint32 *)(chewing_begin + + phrase_length * sizeof(ChewingKey)); + guint32 freq = UnalignedMemory<guint32>::load(pfreq); - total_freq += *freq; + total_freq += freq; if (0 == pinyin_exact_compare2 (keys, (ChewingKey *)chewing_begin, phrase_length)) { @@ -74,8 +77,9 @@ bool PhraseItem::add_pronunciation(ChewingKey * keys, guint32 delta){ if (delta > 0 && total_freq > total_freq + delta) return false; - *freq += delta; + freq += delta; total_freq += delta; + UnalignedMemory<guint32>::store(freq, pfreq); return true; } } @@ -117,9 +121,11 @@ void PhraseItem::increase_pronunciation_possibility(ChewingKey * keys, for (int i = 0; i < npron; ++i) { char * chewing_begin = buf_begin + offset + i * (phrase_length * sizeof(ChewingKey) + sizeof(guint32)); - guint32 * freq = (guint32 *)(chewing_begin + - phrase_length * sizeof(ChewingKey)); - total_freq += *freq; + + guint32 * pfreq = (guint32 *)(chewing_begin + + phrase_length * sizeof(ChewingKey)); + guint32 freq = UnalignedMemory<guint32>::load(pfreq); + total_freq += freq; if (0 == pinyin_compare_with_tones(keys, (ChewingKey *)chewing_begin, phrase_length)) { @@ -128,8 +134,9 @@ void PhraseItem::increase_pronunciation_possibility(ChewingKey * keys, if (delta > 0 && total_freq > total_freq + delta) return; - *freq += delta; + freq += delta; total_freq += delta; + UnalignedMemory<guint32>::store(freq, pfreq); } } } diff --git a/src/storage/phrase_index.h b/src/storage/phrase_index.h index 83dfb51..7e910ed 100644 --- a/src/storage/phrase_index.h +++ b/src/storage/phrase_index.h @@ -31,6 +31,7 @@ #include "memory_chunk.h" #include "phrase_index_logger.h" #include "table_info.h" +#include "unaligned_memory.h" /** * Phrase Index File Format @@ -121,8 +122,7 @@ public: * */ guint32 get_unigram_frequency(){ - char * buf_begin = (char *)m_chunk.begin(); - return (*(guint32 *)(buf_begin + sizeof(guint8) + sizeof(guint8))); + return m_chunk.get_content<guint32>(sizeof(guint8) + sizeof(guint8)); } /** @@ -142,12 +142,13 @@ public: for ( int i = 0 ; i < npron ; ++i){ char * chewing_begin = buf_begin + offset + i * (phrase_length * sizeof(ChewingKey) + sizeof(guint32)); - guint32 * freq = (guint32 *)(chewing_begin + - phrase_length * sizeof(ChewingKey)); - total_freq += *freq; + + guint32 freq = UnalignedMemory<guint32>::load(chewing_begin + + phrase_length * sizeof(ChewingKey)); + total_freq += freq; if ( 0 == pinyin_compare_with_tones(keys, (ChewingKey *)chewing_begin, phrase_length) ){ - matched += *freq; + matched += freq; } } diff --git a/src/storage/phrase_large_table2.cpp b/src/storage/phrase_large_table2.cpp index b72917c..28eb313 100644 --- a/src/storage/phrase_large_table2.cpp +++ b/src/storage/phrase_large_table2.cpp @@ -561,7 +561,8 @@ bool PhraseLengthIndexLevel2::load(MemoryChunk * chunk, table_offset_t offset, table_offset_t end) { char * buf_begin = (char *) chunk->begin(); - guint32 nindex = *((guint32 *)(buf_begin + offset)); + guint32 nindex = chunk->get_content<guint32>(offset); + table_offset_t * index = (table_offset_t *) (buf_begin + offset + sizeof(guint32)); |