first commit
commit
b9de678703
|
@ -0,0 +1,15 @@
|
|||
# qt虚拟输入法,可输入中文,可用于ARM-LINUX
|
||||
|
||||
1. 先编译googlepinyin(注意选择release),生成libgooglepinyin.a,将改文件放到plugin/googlepinyin文件夹下
|
||||
2. 编译plugin,生成libtgtsmlInputContextPlugin.so,(注意不要复制链接文件,终端输入ls -la查看链接文件),将该文件复制到开发板qt的plugins/platforminputcontexts文件夹下,若无platforminputcontexts文件夹,则手动建立
|
||||
3. 将plugin下的dict文件夹放到工程编译生成的APP同一目录下
|
||||
4. 在工程的main.cpp的主函数中添加qputenv("QT_IM_MODULE", QByteArray("tgtsml"));,注意放到QApplication a(argc, argv);之前
|
||||
5. 运行APP,点击文本框,即可弹出输入法
|
||||
|
||||
教程链接:
|
||||
|
||||
https://blog.csdn.net/qq_32605451/article/details/107705710
|
||||
|
||||
原链接:
|
||||
|
||||
https://gitee.com/smartwell/QtInputMethod_GooglePinyin?_from=gitee_search
|
|
@ -0,0 +1,269 @@
|
|||
/*
|
||||
* Copyright (C) 2009 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
/**
|
||||
* This class defines AtomDictBase class which is the base class for all atom
|
||||
* dictionaries. Atom dictionaries are managed by the decoder class
|
||||
* MatrixSearch.
|
||||
*
|
||||
* When the user appends a new character to the Pinyin string, all enabled atom
|
||||
* dictionaries' extend_dict() will be called at least once to get candidates
|
||||
* ended in this step (the information of starting step is also given in the
|
||||
* parameter). Usually, when extend_dict() is called, a MileStoneHandle object
|
||||
* returned by a previous calling for a earlier step is given to speed up the
|
||||
* look-up process, and a new MileStoneHandle object will be returned if
|
||||
* the extension is successful.
|
||||
*
|
||||
* A returned MileStoneHandle object should keep alive until Function
|
||||
* reset_milestones() is called and this object is noticed to be reset.
|
||||
*
|
||||
* Usually, the atom dictionary can use step information to manage its
|
||||
* MileStoneHandle objects, or it can make the objects in ascendant order to
|
||||
* make the reset easier.
|
||||
*
|
||||
* When the decoder loads the dictionary, it will give a starting lemma id for
|
||||
* this atom dictionary to map a inner id to a global id. Global ids should be
|
||||
* used when an atom dictionary talks to any component outside.
|
||||
*/
|
||||
#ifndef PINYINIME_INCLUDE_ATOMDICTBASE_H__
|
||||
#define PINYINIME_INCLUDE_ATOMDICTBASE_H__
|
||||
|
||||
#include <stdlib.h>
|
||||
#include "./dictdef.h"
|
||||
#include "./searchutility.h"
|
||||
|
||||
namespace ime_pinyin {
|
||||
class AtomDictBase {
|
||||
public:
|
||||
virtual ~AtomDictBase() {}
|
||||
|
||||
/**
|
||||
* Load an atom dictionary from a file.
|
||||
*
|
||||
* @param file_name The file name to load dictionary.
|
||||
* @param start_id The starting id used for this atom dictionary.
|
||||
* @param end_id The end id (included) which can be used for this atom
|
||||
* dictionary. User dictionary will always use the last id space, so it can
|
||||
* ignore this paramter. All other atom dictionaries should check this
|
||||
* parameter.
|
||||
* @return True if succeed.
|
||||
*/
|
||||
virtual bool load_dict(const char *file_name, LemmaIdType start_id,
|
||||
LemmaIdType end_id) = 0;
|
||||
|
||||
/**
|
||||
* Close this atom dictionary.
|
||||
*
|
||||
* @return True if succeed.
|
||||
*/
|
||||
virtual bool close_dict() = 0;
|
||||
|
||||
/**
|
||||
* Get the total number of lemmas in this atom dictionary.
|
||||
*
|
||||
* @return The total number of lemmas.
|
||||
*/
|
||||
virtual size_t number_of_lemmas() = 0;
|
||||
|
||||
/**
|
||||
* This function is called by the decoder when user deletes a character from
|
||||
* the input string, or begins a new input string.
|
||||
*
|
||||
* Different atom dictionaries may implement this function in different way.
|
||||
* an atom dictionary can use one of these two parameters (or both) to reset
|
||||
* its corresponding MileStoneHandle objects according its detailed
|
||||
* implementation.
|
||||
*
|
||||
* For example, if an atom dictionary uses step information to manage its
|
||||
* MileStoneHandle objects, parameter from_step can be used to identify which
|
||||
* objects should be reset; otherwise, if another atom dictionary does not
|
||||
* use the detailed step information, it only uses ascendant handles
|
||||
* (according to step. For the same step, earlier call, smaller handle), it
|
||||
* can easily reset those MileStoneHandle which are larger than from_handle.
|
||||
*
|
||||
* The decoder always reset the decoding state by step. So when it begins
|
||||
* resetting, it will call reset_milestones() of its atom dictionaries with
|
||||
* the step information, and the MileStoneHandle objects returned by the
|
||||
* earliest calling of extend_dict() for that step.
|
||||
*
|
||||
* If an atom dictionary does not implement incremental search, this function
|
||||
* can be totally ignored.
|
||||
*
|
||||
* @param from_step From which step(included) the MileStoneHandle
|
||||
* objects should be reset.
|
||||
* @param from_handle The ealiest MileStoneHandle object for step from_step
|
||||
*/
|
||||
virtual void reset_milestones(uint16 from_step,
|
||||
MileStoneHandle from_handle) = 0;
|
||||
|
||||
/**
|
||||
* Used to extend in this dictionary. The handle returned should keep valid
|
||||
* until reset_milestones() is called.
|
||||
*
|
||||
* @param from_handle Its previous returned extended handle without the new
|
||||
* spelling id, it can be used to speed up the extending.
|
||||
* @param dep The paramter used for extending.
|
||||
* @param lpi_items Used to fill in the lemmas matched.
|
||||
* @param lpi_max The length of the buffer
|
||||
* @param lpi_num Used to return the newly added items.
|
||||
* @return The new mile stone for this extending. 0 if fail.
|
||||
*/
|
||||
virtual MileStoneHandle extend_dict(MileStoneHandle from_handle,
|
||||
const DictExtPara *dep,
|
||||
LmaPsbItem *lpi_items,
|
||||
size_t lpi_max, size_t *lpi_num) = 0;
|
||||
|
||||
/**
|
||||
* Get lemma items with scores according to a spelling id stream.
|
||||
* This atom dictionary does not need to sort the returned items.
|
||||
*
|
||||
* @param splid_str The spelling id stream buffer.
|
||||
* @param splid_str_len The length of the spelling id stream buffer.
|
||||
* @param lpi_items Used to return matched lemma items with scores.
|
||||
* @param lpi_max The maximum size of the buffer to return result.
|
||||
* @return The number of matched items which have been filled in to lpi_items.
|
||||
*/
|
||||
virtual size_t get_lpis(const uint16 *splid_str, uint16 splid_str_len,
|
||||
LmaPsbItem *lpi_items, size_t lpi_max) = 0;
|
||||
|
||||
/**
|
||||
* Get a lemma string (The Chinese string) by the given lemma id.
|
||||
*
|
||||
* @param id_lemma The lemma id to get the string.
|
||||
* @param str_buf The buffer to return the Chinese string.
|
||||
* @param str_max The maximum size of the buffer.
|
||||
* @return The length of the string, 0 if fail.
|
||||
*/
|
||||
virtual uint16 get_lemma_str(LemmaIdType id_lemma, char16 *str_buf,
|
||||
uint16 str_max) = 0;
|
||||
|
||||
/**
|
||||
* Get the full spelling ids for the given lemma id.
|
||||
* If the given buffer is too short, return 0.
|
||||
*
|
||||
* @param splids Used to return the spelling ids.
|
||||
* @param splids_max The maximum buffer length of splids.
|
||||
* @param arg_valid Used to indicate if the incoming parameters have been
|
||||
* initialized are valid. If it is true, the splids and splids_max are valid
|
||||
* and there may be half ids in splids to be updated to full ids. In this
|
||||
* case, splids_max is the number of valid ids in splids.
|
||||
* @return The number of ids in the buffer.
|
||||
*/
|
||||
virtual uint16 get_lemma_splids(LemmaIdType id_lemma, uint16 *splids,
|
||||
uint16 splids_max, bool arg_valid) = 0;
|
||||
|
||||
/**
|
||||
* Function used for prediction.
|
||||
* No need to sort the newly added items.
|
||||
*
|
||||
* @param last_hzs The last n Chinese chracters(called Hanzi), its length
|
||||
* should be less than or equal to kMaxPredictSize.
|
||||
* @param hzs_len specifies the length(<= kMaxPredictSize) of the history.
|
||||
* @param npre_items Used used to return the result.
|
||||
* @param npre_max The length of the buffer to return result
|
||||
* @param b4_used Number of prediction result (from npre_items[-b4_used])
|
||||
* from other atom dictionaries. A atom ditionary can just ignore it.
|
||||
* @return The number of prediction result from this atom dictionary.
|
||||
*/
|
||||
virtual size_t predict(const char16 last_hzs[], uint16 hzs_len,
|
||||
NPredictItem *npre_items, size_t npre_max,
|
||||
size_t b4_used) = 0;
|
||||
|
||||
/**
|
||||
* Add a lemma to the dictionary. If the dictionary allows to add new
|
||||
* items and this item does not exist, add it.
|
||||
*
|
||||
* @param lemma_str The Chinese string of the lemma.
|
||||
* @param splids The spelling ids of the lemma.
|
||||
* @param lemma_len The length of the Chinese lemma.
|
||||
* @param count The frequency count for this lemma.
|
||||
*/
|
||||
virtual LemmaIdType put_lemma(char16 lemma_str[], uint16 splids[],
|
||||
uint16 lemma_len, uint16 count) = 0;
|
||||
|
||||
/**
|
||||
* Update a lemma's occuring count.
|
||||
*
|
||||
* @param lemma_id The lemma id to update.
|
||||
* @param delta_count The frequnecy count to ajust.
|
||||
* @param selected Indicate whether this lemma is selected by user and
|
||||
* submitted to target edit box.
|
||||
* @return The id if succeed, 0 if fail.
|
||||
*/
|
||||
virtual LemmaIdType update_lemma(LemmaIdType lemma_id, int16 delta_count,
|
||||
bool selected) = 0;
|
||||
|
||||
/**
|
||||
* Get the lemma id for the given lemma.
|
||||
*
|
||||
* @param lemma_str The Chinese string of the lemma.
|
||||
* @param splids The spelling ids of the lemma.
|
||||
* @param lemma_len The length of the lemma.
|
||||
* @return The matched lemma id, or 0 if fail.
|
||||
*/
|
||||
virtual LemmaIdType get_lemma_id(char16 lemma_str[], uint16 splids[],
|
||||
uint16 lemma_len) = 0;
|
||||
|
||||
/**
|
||||
* Get the lemma score.
|
||||
*
|
||||
* @param lemma_id The lemma id to get score.
|
||||
* @return The score of the lemma, or 0 if fail.
|
||||
*/
|
||||
virtual LmaScoreType get_lemma_score(LemmaIdType lemma_id) = 0;
|
||||
|
||||
/**
|
||||
* Get the lemma score.
|
||||
*
|
||||
* @param lemma_str The Chinese string of the lemma.
|
||||
* @param splids The spelling ids of the lemma.
|
||||
* @param lemma_len The length of the lemma.
|
||||
* @return The score of the lamm, or 0 if fail.
|
||||
*/
|
||||
virtual LmaScoreType get_lemma_score(char16 lemma_str[], uint16 splids[],
|
||||
uint16 lemma_len) = 0;
|
||||
|
||||
/**
|
||||
* If the dictionary allowed, remove a lemma from it.
|
||||
*
|
||||
* @param lemma_id The id of the lemma to remove.
|
||||
* @return True if succeed.
|
||||
*/
|
||||
virtual bool remove_lemma(LemmaIdType lemma_id) = 0;
|
||||
|
||||
/**
|
||||
* Get the total occuring count of this atom dictionary.
|
||||
*
|
||||
* @return The total occuring count of this atom dictionary.
|
||||
*/
|
||||
virtual size_t get_total_lemma_count() = 0;
|
||||
|
||||
/**
|
||||
* Set the total occuring count of other atom dictionaries.
|
||||
*
|
||||
* @param count The total occuring count of other atom dictionaies.
|
||||
*/
|
||||
virtual void set_total_lemma_count_of_others(size_t count) = 0;
|
||||
|
||||
/**
|
||||
* Notify this atom dictionary to flush the cached data to persistent storage
|
||||
* if necessary.
|
||||
*/
|
||||
virtual void flush_cache() = 0;
|
||||
};
|
||||
}
|
||||
|
||||
#endif // PINYINIME_INCLUDE_ATOMDICTBASE_H__
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,171 @@
|
|||
/*
|
||||
* Copyright (C) 2009 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef PINYINIME_INCLUDE_DICTBUILDER_H__
|
||||
#define PINYINIME_INCLUDE_DICTBUILDER_H__
|
||||
|
||||
#include <stdlib.h>
|
||||
#include "./utf16char.h"
|
||||
#include "./dictdef.h"
|
||||
#include "./dictlist.h"
|
||||
#include "./spellingtable.h"
|
||||
#include "./spellingtrie.h"
|
||||
#include "./splparser.h"
|
||||
|
||||
namespace ime_pinyin {
|
||||
|
||||
#ifdef ___BUILD_MODEL___
|
||||
|
||||
#define ___DO_STATISTICS___
|
||||
|
||||
class DictTrie;
|
||||
|
||||
class DictBuilder {
|
||||
private:
|
||||
// The raw lemma array buffer.
|
||||
LemmaEntry *lemma_arr_;
|
||||
size_t lemma_num_;
|
||||
|
||||
// Used to store all possible single char items.
|
||||
// Two items may have the same Hanzi while their spelling ids are different.
|
||||
SingleCharItem *scis_;
|
||||
size_t scis_num_;
|
||||
|
||||
// In the tree, root's level is -1.
|
||||
// Lemma nodes for root, and level 0
|
||||
LmaNodeLE0 *lma_nodes_le0_;
|
||||
|
||||
// Lemma nodes for layers whose levels are deeper than 0
|
||||
LmaNodeGE1 *lma_nodes_ge1_;
|
||||
|
||||
// Number of used lemma nodes
|
||||
size_t lma_nds_used_num_le0_;
|
||||
size_t lma_nds_used_num_ge1_;
|
||||
|
||||
// Used to store homophonies' ids.
|
||||
LemmaIdType *homo_idx_buf_;
|
||||
// Number of homophonies each of which only contains one Chinese character.
|
||||
size_t homo_idx_num_eq1_;
|
||||
// Number of homophonies each of which contains more than one character.
|
||||
size_t homo_idx_num_gt1_;
|
||||
|
||||
// The items with highest scores.
|
||||
LemmaEntry *top_lmas_;
|
||||
size_t top_lmas_num_;
|
||||
|
||||
SpellingTable *spl_table_;
|
||||
SpellingParser *spl_parser_;
|
||||
|
||||
#ifdef ___DO_STATISTICS___
|
||||
size_t max_sonbuf_len_[kMaxLemmaSize];
|
||||
size_t max_homobuf_len_[kMaxLemmaSize];
|
||||
|
||||
size_t total_son_num_[kMaxLemmaSize];
|
||||
size_t total_node_hasson_[kMaxLemmaSize];
|
||||
size_t total_sonbuf_num_[kMaxLemmaSize];
|
||||
size_t total_sonbuf_allnoson_[kMaxLemmaSize];
|
||||
size_t total_node_in_sonbuf_allnoson_[kMaxLemmaSize];
|
||||
size_t total_homo_num_[kMaxLemmaSize];
|
||||
|
||||
size_t sonbufs_num1_; // Number of son buffer with only 1 son
|
||||
size_t sonbufs_numgt1_; // Number of son buffer with more 1 son;
|
||||
|
||||
size_t total_lma_node_num_;
|
||||
|
||||
void stat_init();
|
||||
void stat_print();
|
||||
#endif
|
||||
|
||||
public:
|
||||
|
||||
DictBuilder();
|
||||
~DictBuilder();
|
||||
|
||||
// Build dictionary trie from the file fn_raw. File fn_validhzs provides
|
||||
// valid chars. If fn_validhzs is NULL, only chars in GB2312 will be
|
||||
// included.
|
||||
bool build_dict(const char* fn_raw, const char* fn_validhzs,
|
||||
DictTrie *dict_trie);
|
||||
|
||||
private:
|
||||
// Fill in the buffer with id. The caller guarantees that the paramters are
|
||||
// vaild.
|
||||
void id_to_charbuf(unsigned char *buf, LemmaIdType id);
|
||||
|
||||
// Update the offset of sons for a node.
|
||||
void set_son_offset(LmaNodeGE1 *node, size_t offset);
|
||||
|
||||
// Update the offset of homophonies' ids for a node.
|
||||
void set_homo_id_buf_offset(LmaNodeGE1 *node, size_t offset);
|
||||
|
||||
// Format a speling string.
|
||||
void format_spelling_str(char *spl_str);
|
||||
|
||||
// Sort the lemma_arr by the hanzi string, and give each of unique items
|
||||
// a id. Why we need to sort the lemma list according to their Hanzi string
|
||||
// is to find items started by a given prefix string to do prediction.
|
||||
// Actually, the single char items are be in other order, for example,
|
||||
// in spelling id order, etc.
|
||||
// Return value is next un-allocated idx available.
|
||||
LemmaIdType sort_lemmas_by_hz();
|
||||
|
||||
// Build the SingleCharItem list, and fill the hanzi_scis_ids in the
|
||||
// lemma buffer lemma_arr_.
|
||||
// This function should be called after the lemma array is ready.
|
||||
// Return the number of unique SingleCharItem elements.
|
||||
size_t build_scis();
|
||||
|
||||
// Construct a subtree using a subset of the spelling array (from
|
||||
// item_star to item_end)
|
||||
// parent is the parent node to update the necessary information
|
||||
// parent can be a member of LmaNodeLE0 or LmaNodeGE1
|
||||
bool construct_subset(void* parent, LemmaEntry* lemma_arr,
|
||||
size_t item_start, size_t item_end, size_t level);
|
||||
|
||||
|
||||
// Read valid Chinese Hanzis from the given file.
|
||||
// num is used to return number of chars.
|
||||
// The return buffer is sorted and caller needs to free the returned buffer.
|
||||
char16* read_valid_hanzis(const char *fn_validhzs, size_t *num);
|
||||
|
||||
|
||||
// Read a raw dictionary. max_item is the maximum number of items. If there
|
||||
// are more items in the ditionary, only the first max_item will be read.
|
||||
// Returned value is the number of items successfully read from the file.
|
||||
size_t read_raw_dict(const char* fn_raw, const char *fn_validhzs,
|
||||
size_t max_item);
|
||||
|
||||
// Try to find if a character is in hzs buffer.
|
||||
bool hz_in_hanzis_list(const char16 *hzs, size_t hzs_len, char16 hz);
|
||||
|
||||
// Try to find if all characters in str are in hzs buffer.
|
||||
bool str_in_hanzis_list(const char16 *hzs, size_t hzs_len,
|
||||
const char16 *str, size_t str_len);
|
||||
|
||||
// Get these lemmas with toppest scores.
|
||||
void get_top_lemmas();
|
||||
|
||||
// Allocate resource to build dictionary.
|
||||
// lma_num is the number of items to be loaded
|
||||
bool alloc_resource(size_t lma_num);
|
||||
|
||||
// Free resource.
|
||||
void free_resource();
|
||||
};
|
||||
#endif // ___BUILD_MODEL___
|
||||
}
|
||||
|
||||
#endif // PINYINIME_INCLUDE_DICTBUILDER_H__
|
|
@ -0,0 +1,157 @@
|
|||
/*
|
||||
* Copyright (C) 2009 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef PINYINIME_INCLUDE_DICTDEF_H__
|
||||
#define PINYINIME_INCLUDE_DICTDEF_H__
|
||||
|
||||
#include <stdlib.h>
|
||||
#include "./utf16char.h"
|
||||
|
||||
namespace ime_pinyin {
|
||||
|
||||
// Enable the following line when building the binary dictionary model.
|
||||
// #define ___BUILD_MODEL___
|
||||
|
||||
typedef unsigned char uint8;
|
||||
typedef unsigned short uint16;
|
||||
typedef unsigned int uint32;
|
||||
|
||||
typedef signed char int8;
|
||||
typedef short int16;
|
||||
typedef int int32;
|
||||
typedef long long int64;
|
||||
typedef unsigned long long uint64;
|
||||
|
||||
const bool kPrintDebug0 = false;
|
||||
const bool kPrintDebug1 = false;
|
||||
const bool kPrintDebug2 = false;
|
||||
|
||||
// The max length of a lemma.
|
||||
const size_t kMaxLemmaSize = 8;
|
||||
|
||||
// The max length of a Pinyin (spelling).
|
||||
const size_t kMaxPinyinSize = 6;
|
||||
|
||||
// The number of half spelling ids. For Chinese Pinyin, there 30 half ids.
|
||||
// See SpellingTrie.h for details.
|
||||
const size_t kHalfSpellingIdNum = 29;
|
||||
|
||||
// The maximum number of full spellings. For Chinese Pinyin, there are only
|
||||
// about 410 spellings.
|
||||
// If change this value is bigger(needs more bits), please also update
|
||||
// other structures like SpellingNode, to make sure than a spelling id can be
|
||||
// stored.
|
||||
// -1 is because that 0 is never used.
|
||||
const size_t kMaxSpellingNum = 512 - kHalfSpellingIdNum - 1;
|
||||
const size_t kMaxSearchSteps = 40;
|
||||
|
||||
// One character predicts its following characters.
|
||||
const size_t kMaxPredictSize = (kMaxLemmaSize - 1);
|
||||
|
||||
// LemmaIdType must always be size_t.
|
||||
typedef size_t LemmaIdType;
|
||||
const size_t kLemmaIdSize = 3; // Actually, a Id occupies 3 bytes in storage.
|
||||
const size_t kLemmaIdComposing = 0xffffff;
|
||||
|
||||
typedef uint16 LmaScoreType;
|
||||
typedef uint16 KeyScoreType;
|
||||
|
||||
// Number of items with highest score are kept for prediction purpose.
|
||||
const size_t kTopScoreLemmaNum = 10;
|
||||
|
||||
const size_t kMaxPredictNumByGt3 = 1;
|
||||
const size_t kMaxPredictNumBy3 = 2;
|
||||
const size_t kMaxPredictNumBy2 = 2;
|
||||
|
||||
// The last lemma id (included) for the system dictionary. The system
|
||||
// dictionary's ids always start from 1.
|
||||
const LemmaIdType kSysDictIdEnd = 500000;
|
||||
|
||||
// The first lemma id for the user dictionary.
|
||||
const LemmaIdType kUserDictIdStart = 500001;
|
||||
|
||||
// The last lemma id (included) for the user dictionary.
|
||||
const LemmaIdType kUserDictIdEnd = 600000;
|
||||
|
||||
typedef struct {
|
||||
uint16 half_splid:5;
|
||||
uint16 full_splid:11;
|
||||
} SpellingId, *PSpellingId;
|
||||
|
||||
|
||||
/**
|
||||
* We use different node types for different layers
|
||||
* Statistical data of the building result for a testing dictionary:
|
||||
* root, level 0, level 1, level 2, level 3
|
||||
* max son num of one node: 406 280 41 2 -
|
||||
* max homo num of one node: 0 90 23 2 2
|
||||
* total node num of a layer: 1 406 31766 13516 993
|
||||
* total homo num of a layer: 9 5674 44609 12667 995
|
||||
*
|
||||
* The node number for root and level 0 won't be larger than 500
|
||||
* According to the information above, two kinds of nodes can be used; one for
|
||||
* root and level 0, the other for these layers deeper than 0.
|
||||
*
|
||||
* LE = less and equal,
|
||||
* A node occupies 16 bytes. so, totallly less than 16 * 500 = 8K
|
||||
*/
|
||||
struct LmaNodeLE0 {
|
||||
uint32 son_1st_off;
|
||||
uint32 homo_idx_buf_off;
|
||||
uint16 spl_idx;
|
||||
uint16 num_of_son;
|
||||
uint16 num_of_homo;
|
||||
};
|
||||
|
||||
/**
|
||||
* GE = great and equal
|
||||
* A node occupies 8 bytes.
|
||||
*/
|
||||
struct LmaNodeGE1 {
|
||||
uint16 son_1st_off_l; // Low bits of the son_1st_off
|
||||
uint16 homo_idx_buf_off_l; // Low bits of the homo_idx_buf_off_1
|
||||
uint16 spl_idx;
|
||||
unsigned char num_of_son; // number of son nodes
|
||||
unsigned char num_of_homo; // number of homo words
|
||||
unsigned char son_1st_off_h; // high bits of the son_1st_off
|
||||
unsigned char homo_idx_buf_off_h; // high bits of the homo_idx_buf_off
|
||||
};
|
||||
|
||||
#ifdef ___BUILD_MODEL___
|
||||
struct SingleCharItem {
|
||||
float freq;
|
||||
char16 hz;
|
||||
SpellingId splid;
|
||||
};
|
||||
|
||||
struct LemmaEntry {
|
||||
LemmaIdType idx_by_py;
|
||||
LemmaIdType idx_by_hz;
|
||||
char16 hanzi_str[kMaxLemmaSize + 1];
|
||||
|
||||
// The SingleCharItem id for each Hanzi.
|
||||
uint16 hanzi_scis_ids[kMaxLemmaSize];
|
||||
|
||||
uint16 spl_idx_arr[kMaxLemmaSize + 1];
|
||||
char pinyin_str[kMaxLemmaSize][kMaxPinyinSize + 1];
|
||||
unsigned char hz_str_len;
|
||||
float freq;
|
||||
};
|
||||
#endif // ___BUILD_MODEL___
|
||||
|
||||
} // namespace ime_pinyin
|
||||
|
||||
#endif // PINYINIME_INCLUDE_DICTDEF_H__
|
|
@ -0,0 +1,446 @@
|
|||
/*
|
||||
* Copyright (C) 2009 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include "dictlist.h"
|
||||
#include "mystdlib.h"
|
||||
#include "ngram.h"
|
||||
#include "searchutility.h"
|
||||
|
||||
namespace ime_pinyin {
|
||||
|
||||
DictList::DictList() {
|
||||
initialized_ = false;
|
||||
scis_num_ = 0;
|
||||
scis_hz_ = NULL;
|
||||
scis_splid_ = NULL;
|
||||
buf_ = NULL;
|
||||
spl_trie_ = SpellingTrie::get_cpinstance();
|
||||
|
||||
assert(kMaxLemmaSize == 8);
|
||||
cmp_func_[0] = cmp_hanzis_1;
|
||||
cmp_func_[1] = cmp_hanzis_2;
|
||||
cmp_func_[2] = cmp_hanzis_3;
|
||||
cmp_func_[3] = cmp_hanzis_4;
|
||||
cmp_func_[4] = cmp_hanzis_5;
|
||||
cmp_func_[5] = cmp_hanzis_6;
|
||||
cmp_func_[6] = cmp_hanzis_7;
|
||||
cmp_func_[7] = cmp_hanzis_8;
|
||||
}
|
||||
|
||||
DictList::~DictList() {
|
||||
free_resource();
|
||||
}
|
||||
|
||||
bool DictList::alloc_resource(size_t buf_size, size_t scis_num) {
|
||||
// Allocate memory
|
||||
buf_ = static_cast<char16*>(malloc(buf_size * sizeof(char16)));
|
||||
if (NULL == buf_)
|
||||
return false;
|
||||
|
||||
scis_num_ = scis_num;
|
||||
|
||||
scis_hz_ = static_cast<char16*>(malloc(scis_num_ * sizeof(char16)));
|
||||
if (NULL == scis_hz_)
|
||||
return false;
|
||||
|
||||
scis_splid_ = static_cast<SpellingId*>
|
||||
(malloc(scis_num_ * sizeof(SpellingId)));
|
||||
|
||||
if (NULL == scis_splid_)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
void DictList::free_resource() {
|
||||
if (NULL != buf_)
|
||||
free(buf_);
|
||||
buf_ = NULL;
|
||||
|
||||
if (NULL != scis_hz_)
|
||||
free(scis_hz_);
|
||||
scis_hz_ = NULL;
|
||||
|
||||
if (NULL != scis_splid_)
|
||||
free(scis_splid_);
|
||||
scis_splid_ = NULL;
|
||||
}
|
||||
|
||||
#ifdef ___BUILD_MODEL___
|
||||
bool DictList::init_list(const SingleCharItem *scis, size_t scis_num,
|
||||
const LemmaEntry *lemma_arr, size_t lemma_num) {
|
||||
if (NULL == scis || 0 == scis_num || NULL == lemma_arr || 0 == lemma_num)
|
||||
return false;
|
||||
|
||||
initialized_ = false;
|
||||
|
||||
if (NULL != buf_)
|
||||
free(buf_);
|
||||
|
||||
// calculate the size
|
||||
size_t buf_size = calculate_size(lemma_arr, lemma_num);
|
||||
if (0 == buf_size)
|
||||
return false;
|
||||
|
||||
if (!alloc_resource(buf_size, scis_num))
|
||||
return false;
|
||||
|
||||
fill_scis(scis, scis_num);
|
||||
|
||||
// Copy the related content from the array to inner buffer
|
||||
fill_list(lemma_arr, lemma_num);
|
||||
|
||||
initialized_ = true;
|
||||
return true;
|
||||
}
|
||||
|
||||
size_t DictList::calculate_size(const LemmaEntry* lemma_arr, size_t lemma_num) {
|
||||
size_t last_hz_len = 0;
|
||||
size_t list_size = 0;
|
||||
size_t id_num = 0;
|
||||
|
||||
for (size_t i = 0; i < lemma_num; i++) {
|
||||
if (0 == i) {
|
||||
last_hz_len = lemma_arr[i].hz_str_len;
|
||||
|
||||
assert(last_hz_len > 0);
|
||||
assert(lemma_arr[0].idx_by_hz == 1);
|
||||
|
||||
id_num++;
|
||||
start_pos_[0] = 0;
|
||||
start_id_[0] = id_num;
|
||||
|
||||
last_hz_len = 1;
|
||||
list_size += last_hz_len;
|
||||
} else {
|
||||
size_t current_hz_len = lemma_arr[i].hz_str_len;
|
||||
|
||||
assert(current_hz_len >= last_hz_len);
|
||||
|
||||
if (current_hz_len == last_hz_len) {
|
||||
list_size += current_hz_len;
|
||||
id_num++;
|
||||
} else {
|
||||
for (size_t len = last_hz_len; len < current_hz_len - 1; len++) {
|
||||
start_pos_[len] = start_pos_[len - 1];
|
||||
start_id_[len] = start_id_[len - 1];
|
||||
}
|
||||
|
||||
start_pos_[current_hz_len - 1] = list_size;
|
||||
|
||||
id_num++;
|
||||
start_id_[current_hz_len - 1] = id_num;
|
||||
|
||||
last_hz_len = current_hz_len;
|
||||
list_size += current_hz_len;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t i = last_hz_len; i <= kMaxLemmaSize; i++) {
|
||||
if (0 == i) {
|
||||
start_pos_[0] = 0;
|
||||
start_id_[0] = 1;
|
||||
} else {
|
||||
start_pos_[i] = list_size;
|
||||
start_id_[i] = id_num;
|
||||
}
|
||||
}
|
||||
|
||||
return start_pos_[kMaxLemmaSize];
|
||||
}
|
||||
|
||||
void DictList::fill_scis(const SingleCharItem *scis, size_t scis_num) {
|
||||
assert(scis_num_ == scis_num);
|
||||
|
||||
for (size_t pos = 0; pos < scis_num_; pos++) {
|
||||
scis_hz_[pos] = scis[pos].hz;
|
||||
scis_splid_[pos] = scis[pos].splid;
|
||||
}
|
||||
}
|
||||
|
||||
void DictList::fill_list(const LemmaEntry* lemma_arr, size_t lemma_num) {
|
||||
size_t current_pos = 0;
|
||||
|
||||
utf16_strncpy(buf_, lemma_arr[0].hanzi_str,
|
||||
lemma_arr[0].hz_str_len);
|
||||
|
||||
current_pos = lemma_arr[0].hz_str_len;
|
||||
|
||||
size_t id_num = 1;
|
||||
|
||||
for (size_t i = 1; i < lemma_num; i++) {
|
||||
utf16_strncpy(buf_ + current_pos, lemma_arr[i].hanzi_str,
|
||||
lemma_arr[i].hz_str_len);
|
||||
|
||||
id_num++;
|
||||
current_pos += lemma_arr[i].hz_str_len;
|
||||
}
|
||||
|
||||
assert(current_pos == start_pos_[kMaxLemmaSize]);
|
||||
assert(id_num == start_id_[kMaxLemmaSize]);
|
||||
}
|
||||
|
||||
char16* DictList::find_pos2_startedbyhz(char16 hz_char) {
|
||||
char16 *found_2w = static_cast<char16*>
|
||||
(mybsearch(&hz_char, buf_ + start_pos_[1],
|
||||
(start_pos_[2] - start_pos_[1]) / 2,
|
||||
sizeof(char16) * 2, cmp_hanzis_1));
|
||||
if (NULL == found_2w)
|
||||
return NULL;
|
||||
|
||||
while (found_2w > buf_ + start_pos_[1] && *found_2w == *(found_2w - 1))
|
||||
found_2w -= 2;
|
||||
|
||||
return found_2w;
|
||||
}
|
||||
#endif // ___BUILD_MODEL___
|
||||
|
||||
char16* DictList::find_pos_startedbyhzs(const char16 last_hzs[],
|
||||
size_t word_len, int (*cmp_func)(const void *, const void *)) {
|
||||
char16 *found_w = static_cast<char16*>
|
||||
(mybsearch(last_hzs, buf_ + start_pos_[word_len - 1],
|
||||
(start_pos_[word_len] - start_pos_[word_len - 1])
|
||||
/ word_len,
|
||||
sizeof(char16) * word_len, cmp_func));
|
||||
|
||||
if (NULL == found_w)
|
||||
return NULL;
|
||||
|
||||
while (found_w > buf_ + start_pos_[word_len -1] &&
|
||||
cmp_func(found_w, found_w - word_len) == 0)
|
||||
found_w -= word_len;
|
||||
|
||||
return found_w;
|
||||
}
|
||||
|
||||
size_t DictList::predict(const char16 last_hzs[], uint16 hzs_len,
|
||||
NPredictItem *npre_items, size_t npre_max,
|
||||
size_t b4_used) {
|
||||
assert(hzs_len <= kMaxPredictSize && hzs_len > 0);
|
||||
|
||||
// 1. Prepare work
|
||||
int (*cmp_func)(const void *, const void *) = cmp_func_[hzs_len - 1];
|
||||
|
||||
NGram& ngram = NGram::get_instance();
|
||||
|
||||
size_t item_num = 0;
|
||||
|
||||
// 2. Do prediction
|
||||
for (uint16 pre_len = 1; pre_len <= kMaxPredictSize + 1 - hzs_len;
|
||||
pre_len++) {
|
||||
uint16 word_len = hzs_len + pre_len;
|
||||
char16 *w_buf = find_pos_startedbyhzs(last_hzs, word_len, cmp_func);
|
||||
if (NULL == w_buf)
|
||||
continue;
|
||||
while (w_buf < buf_ + start_pos_[word_len] &&
|
||||
cmp_func(w_buf, last_hzs) == 0 &&
|
||||
item_num < npre_max) {
|
||||
memset(npre_items + item_num, 0, sizeof(NPredictItem));
|
||||
utf16_strncpy(npre_items[item_num].pre_hzs, w_buf + hzs_len, pre_len);
|
||||
npre_items[item_num].psb =
|
||||
ngram.get_uni_psb((size_t)(w_buf - buf_ - start_pos_[word_len - 1])
|
||||
/ word_len + start_id_[word_len - 1]);
|
||||
npre_items[item_num].his_len = hzs_len;
|
||||
item_num++;
|
||||
w_buf += word_len;
|
||||
}
|
||||
}
|
||||
|
||||
size_t new_num = 0;
|
||||
for (size_t i = 0; i < item_num; i++) {
|
||||
// Try to find it in the existing items
|
||||
size_t e_pos;
|
||||
for (e_pos = 1; e_pos <= b4_used; e_pos++) {
|
||||
if (utf16_strncmp((*(npre_items - e_pos)).pre_hzs, npre_items[i].pre_hzs,
|
||||
kMaxPredictSize) == 0)
|
||||
break;
|
||||
}
|
||||
if (e_pos <= b4_used)
|
||||
continue;
|
||||
|
||||
// If not found, append it to the buffer
|
||||
npre_items[new_num] = npre_items[i];
|
||||
new_num++;
|
||||
}
|
||||
|
||||
return new_num;
|
||||
}
|
||||
|
||||
uint16 DictList::get_lemma_str(LemmaIdType id_lemma, char16 *str_buf,
|
||||
uint16 str_max) {
|
||||
if (!initialized_ || id_lemma >= start_id_[kMaxLemmaSize] || NULL == str_buf
|
||||
|| str_max <= 1)
|
||||
return 0;
|
||||
|
||||
// Find the range
|
||||
for (uint16 i = 0; i < kMaxLemmaSize; i++) {
|
||||
if (i + 1 > str_max - 1)
|
||||
return 0;
|
||||
if (start_id_[i] <= id_lemma && start_id_[i + 1] > id_lemma) {
|
||||
size_t id_span = id_lemma - start_id_[i];
|
||||
|
||||
uint16 *buf = buf_ + start_pos_[i] + id_span * (i + 1);
|
||||
for (uint16 len = 0; len <= i; len++) {
|
||||
str_buf[len] = buf[len];
|
||||
}
|
||||
str_buf[i+1] = (char16)'\0';
|
||||
return i + 1;
|
||||
}
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
uint16 DictList::get_splids_for_hanzi(char16 hanzi, uint16 half_splid,
|
||||
uint16 *splids, uint16 max_splids) {
|
||||
char16 *hz_found = static_cast<char16*>
|
||||
(mybsearch(&hanzi, scis_hz_, scis_num_, sizeof(char16), cmp_hanzis_1));
|
||||
assert(NULL != hz_found && hanzi == *hz_found);
|
||||
|
||||
// Move to the first one.
|
||||
while (hz_found > scis_hz_ && hanzi == *(hz_found - 1))
|
||||
hz_found--;
|
||||
|
||||
// First try to found if strict comparison result is not zero.
|
||||
char16 *hz_f = hz_found;
|
||||
bool strict = false;
|
||||
while (hz_f < scis_hz_ + scis_num_ && hanzi == *hz_f) {
|
||||
uint16 pos = hz_f - scis_hz_;
|
||||
if (0 == half_splid || scis_splid_[pos].half_splid == half_splid) {
|
||||
strict = true;
|
||||
}
|
||||
hz_f++;
|
||||
}
|
||||
|
||||
uint16 found_num = 0;
|
||||
while (hz_found < scis_hz_ + scis_num_ && hanzi == *hz_found) {
|
||||
uint16 pos = hz_found - scis_hz_;
|
||||
if (0 == half_splid ||
|
||||
(strict && scis_splid_[pos].half_splid == half_splid) ||
|
||||
(!strict && spl_trie_->half_full_compatible(half_splid,
|
||||
scis_splid_[pos].full_splid))) {
|
||||
assert(found_num + 1 < max_splids);
|
||||
splids[found_num] = scis_splid_[pos].full_splid;
|
||||
found_num++;
|
||||
}
|
||||
hz_found++;
|
||||
}
|
||||
|
||||
return found_num;
|
||||
}
|
||||
|
||||
LemmaIdType DictList::get_lemma_id(const char16 *str, uint16 str_len) {
|
||||
if (NULL == str || str_len > kMaxLemmaSize)
|
||||
return 0;
|
||||
|
||||
char16 *found = find_pos_startedbyhzs(str, str_len, cmp_func_[str_len - 1]);
|
||||
if (NULL == found)
|
||||
return 0;
|
||||
|
||||
assert(found > buf_);
|
||||
assert(static_cast<size_t>(found - buf_) >= start_pos_[str_len - 1]);
|
||||
return static_cast<LemmaIdType>
|
||||
(start_id_[str_len - 1] +
|
||||
(found - buf_ - start_pos_[str_len - 1]) / str_len);
|
||||
}
|
||||
|
||||
void DictList::convert_to_hanzis(char16 *str, uint16 str_len) {
|
||||
assert(NULL != str);
|
||||
|
||||
for (uint16 str_pos = 0; str_pos < str_len; str_pos++) {
|
||||
str[str_pos] = scis_hz_[str[str_pos]];
|
||||
}
|
||||
}
|
||||
|
||||
void DictList::convert_to_scis_ids(char16 *str, uint16 str_len) {
|
||||
assert(NULL != str);
|
||||
|
||||
for (uint16 str_pos = 0; str_pos < str_len; str_pos++) {
|
||||
str[str_pos] = 0x100;
|
||||
}
|
||||
}
|
||||
|
||||
bool DictList::save_list(FILE *fp) {
|
||||
if (!initialized_ || NULL == fp)
|
||||
return false;
|
||||
|
||||
if (NULL == buf_ || 0 == start_pos_[kMaxLemmaSize] ||
|
||||
NULL == scis_hz_ || NULL == scis_splid_ || 0 == scis_num_)
|
||||
return false;
|
||||
|
||||
if (fwrite(&scis_num_, sizeof(uint32), 1, fp) != 1)
|
||||
return false;
|
||||
|
||||
if (fwrite(start_pos_, sizeof(uint32), kMaxLemmaSize + 1, fp) !=
|
||||
kMaxLemmaSize + 1)
|
||||
return false;
|
||||
|
||||
if (fwrite(start_id_, sizeof(uint32), kMaxLemmaSize + 1, fp) !=
|
||||
kMaxLemmaSize + 1)
|
||||
return false;
|
||||
|
||||
if (fwrite(scis_hz_, sizeof(char16), scis_num_, fp) != scis_num_)
|
||||
return false;
|
||||
|
||||
if (fwrite(scis_splid_, sizeof(SpellingId), scis_num_, fp) != scis_num_)
|
||||
return false;
|
||||
|
||||
if (fwrite(buf_, sizeof(char16), start_pos_[kMaxLemmaSize], fp) !=
|
||||
start_pos_[kMaxLemmaSize])
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool DictList::load_list(FILE *fp) {
|
||||
if (NULL == fp)
|
||||
return false;
|
||||
|
||||
initialized_ = false;
|
||||
|
||||
if (fread(&scis_num_, sizeof(uint32), 1, fp) != 1)
|
||||
return false;
|
||||
|
||||
if (fread(start_pos_, sizeof(uint32), kMaxLemmaSize + 1, fp) !=
|
||||
kMaxLemmaSize + 1)
|
||||
return false;
|
||||
|
||||
if (fread(start_id_, sizeof(uint32), kMaxLemmaSize + 1, fp) !=
|
||||
kMaxLemmaSize + 1)
|
||||
return false;
|
||||
|
||||
free_resource();
|
||||
|
||||
if (!alloc_resource(start_pos_[kMaxLemmaSize], scis_num_))
|
||||
return false;
|
||||
|
||||
if (fread(scis_hz_, sizeof(char16), scis_num_, fp) != scis_num_)
|
||||
return false;
|
||||
|
||||
if (fread(scis_splid_, sizeof(SpellingId), scis_num_, fp) != scis_num_)
|
||||
return false;
|
||||
|
||||
if (fread(buf_, sizeof(char16), start_pos_[kMaxLemmaSize], fp) !=
|
||||
start_pos_[kMaxLemmaSize])
|
||||
return false;
|
||||
|
||||
initialized_ = true;
|
||||
return true;
|
||||
}
|
||||
} // namespace ime_pinyin
|
|
@ -0,0 +1,120 @@
|
|||
/*
|
||||
* Copyright (C) 2009 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef PINYINIME_INCLUDE_DICTLIST_H__
|
||||
#define PINYINIME_INCLUDE_DICTLIST_H__
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include "./dictdef.h"
|
||||
#include "./searchutility.h"
|
||||
#include "./spellingtrie.h"
|
||||
#include "./utf16char.h"
|
||||
|
||||
namespace ime_pinyin {
|
||||
|
||||
class DictList {
|
||||
private:
|
||||
bool initialized_;
|
||||
|
||||
const SpellingTrie *spl_trie_;
|
||||
|
||||
// Number of SingCharItem. The first is blank, because id 0 is invalid.
|
||||
uint32 scis_num_;
|
||||
char16 *scis_hz_;
|
||||
SpellingId *scis_splid_;
|
||||
|
||||
// The large memory block to store the word list.
|
||||
char16 *buf_;
|
||||
|
||||
// Starting position of those words whose lengths are i+1, counted in
|
||||
// char16
|
||||
uint32 start_pos_[kMaxLemmaSize + 1];
|
||||
|
||||
uint32 start_id_[kMaxLemmaSize + 1];
|
||||
|
||||
int (*cmp_func_[kMaxLemmaSize])(const void *, const void *);
|
||||
|
||||
bool alloc_resource(size_t buf_size, size_t scim_num);
|
||||
|
||||
void free_resource();
|
||||
|
||||
#ifdef ___BUILD_MODEL___
|
||||
// Calculate the requsted memory, including the start_pos[] buffer.
|
||||
size_t calculate_size(const LemmaEntry *lemma_arr, size_t lemma_num);
|
||||
|
||||
void fill_scis(const SingleCharItem *scis, size_t scis_num);
|
||||
|
||||
// Copy the related content to the inner buffer
|
||||
// It should be called after calculate_size()
|
||||
void fill_list(const LemmaEntry *lemma_arr, size_t lemma_num);
|
||||
|
||||
// Find the starting position for the buffer of those 2-character Chinese word
|
||||
// whose first character is the given Chinese character.
|
||||
char16* find_pos2_startedbyhz(char16 hz_char);
|
||||
#endif
|
||||
|
||||
// Find the starting position for the buffer of those words whose lengths are
|
||||
// word_len. The given parameter cmp_func decides how many characters from
|
||||
// beginning will be used to compare.
|
||||
char16* find_pos_startedbyhzs(const char16 last_hzs[],
|
||||
size_t word_Len,
|
||||
int (*cmp_func)(const void *, const void *));
|
||||
|
||||
public:
|
||||
|
||||
DictList();
|
||||
~DictList();
|
||||
|
||||
bool save_list(FILE *fp);
|
||||
bool load_list(FILE *fp);
|
||||
|
||||
#ifdef ___BUILD_MODEL___
|
||||
// Init the list from the LemmaEntry array.
|
||||
// lemma_arr should have been sorted by the hanzi_str, and have been given
|
||||
// ids from 1
|
||||
bool init_list(const SingleCharItem *scis, size_t scis_num,
|
||||
const LemmaEntry *lemma_arr, size_t lemma_num);
|
||||
#endif
|
||||
|
||||
// Get the hanzi string for the given id
|
||||
uint16 get_lemma_str(LemmaIdType id_hz, char16 *str_buf, uint16 str_max);
|
||||
|
||||
void convert_to_hanzis(char16 *str, uint16 str_len);
|
||||
|
||||
void convert_to_scis_ids(char16 *str, uint16 str_len);
|
||||
|
||||
// last_hzs stores the last n Chinese characters history, its length should be
|
||||
// less or equal than kMaxPredictSize.
|
||||
// hzs_len specifies the length(<= kMaxPredictSize).
|
||||
// predict_buf is used to store the result.
|
||||
// buf_len specifies the buffer length.
|
||||
// b4_used specifies how many items before predict_buf have been used.
|
||||
// Returned value is the number of newly added items.
|
||||
size_t predict(const char16 last_hzs[], uint16 hzs_len,
|
||||
NPredictItem *npre_items, size_t npre_max,
|
||||
size_t b4_used);
|
||||
|
||||
// If half_splid is a valid half spelling id, return those full spelling
|
||||
// ids which share this half id.
|
||||
uint16 get_splids_for_hanzi(char16 hanzi, uint16 half_splid,
|
||||
uint16 *splids, uint16 max_splids);
|
||||
|
||||
LemmaIdType get_lemma_id(const char16 *str, uint16 str_len);
|
||||
};
|
||||
}
|
||||
|
||||
#endif // PINYINIME_INCLUDE_DICTLIST_H__
|
|
@ -0,0 +1,941 @@
|
|||
/*
|
||||
* Copyright (C) 2009 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include "dicttrie.h"
|
||||
#include "dictbuilder.h"
|
||||
#include "lpicache.h"
|
||||
#include "mystdlib.h"
|
||||
#include "ngram.h"
|
||||
|
||||
namespace ime_pinyin {
|
||||
|
||||
DictTrie::DictTrie() {
|
||||
spl_trie_ = SpellingTrie::get_cpinstance();
|
||||
|
||||
root_ = NULL;
|
||||
splid_le0_index_ = NULL;
|
||||
lma_node_num_le0_ = 0;
|
||||
nodes_ge1_ = NULL;
|
||||
lma_node_num_ge1_ = 0;
|
||||
lma_idx_buf_ = NULL;
|
||||
lma_idx_buf_len_ = 0;
|
||||
total_lma_num_ = 0;
|
||||
top_lmas_num_ = 0;
|
||||
dict_list_ = NULL;
|
||||
|
||||
parsing_marks_ = NULL;
|
||||
mile_stones_ = NULL;
|
||||
reset_milestones(0, kFirstValidMileStoneHandle);
|
||||
}
|
||||
|
||||
DictTrie::~DictTrie() {
|
||||
free_resource(true);
|
||||
}
|
||||
|
||||
void DictTrie::free_resource(bool free_dict_list) {
|
||||
if (NULL != root_)
|
||||
free(root_);
|
||||
root_ = NULL;
|
||||
|
||||
if (NULL != splid_le0_index_)
|
||||
free(splid_le0_index_);
|
||||
splid_le0_index_ = NULL;
|
||||
|
||||
if (NULL != nodes_ge1_)
|
||||
free(nodes_ge1_);
|
||||
nodes_ge1_ = NULL;
|
||||
|
||||
if (NULL != lma_idx_buf_)
|
||||
free(lma_idx_buf_);
|
||||
lma_idx_buf_ = NULL;
|
||||
|
||||
if (free_dict_list) {
|
||||
if (NULL != dict_list_) {
|
||||
delete dict_list_;
|
||||
}
|
||||
dict_list_ = NULL;
|
||||
}
|
||||
|
||||
if (parsing_marks_)
|
||||
delete [] parsing_marks_;
|
||||
parsing_marks_ = NULL;
|
||||
|
||||
if (mile_stones_)
|
||||
delete [] mile_stones_;
|
||||
mile_stones_ = NULL;
|
||||
|
||||
reset_milestones(0, kFirstValidMileStoneHandle);
|
||||
}
|
||||
|
||||
inline size_t DictTrie::get_son_offset(const LmaNodeGE1 *node) {
|
||||
return ((size_t)node->son_1st_off_l + ((size_t)node->son_1st_off_h << 16));
|
||||
}
|
||||
|
||||
inline size_t DictTrie::get_homo_idx_buf_offset(const LmaNodeGE1 *node) {
|
||||
return ((size_t)node->homo_idx_buf_off_l +
|
||||
((size_t)node->homo_idx_buf_off_h << 16));
|
||||
}
|
||||
|
||||
inline LemmaIdType DictTrie::get_lemma_id(size_t id_offset) {
|
||||
LemmaIdType id = 0;
|
||||
for (uint16 pos = kLemmaIdSize - 1; pos > 0; pos--)
|
||||
id = (id << 8) + lma_idx_buf_[id_offset * kLemmaIdSize + pos];
|
||||
id = (id << 8) + lma_idx_buf_[id_offset * kLemmaIdSize];
|
||||
return id;
|
||||
}
|
||||
|
||||
#ifdef ___BUILD_MODEL___
|
||||
bool DictTrie::build_dict(const char* fn_raw, const char* fn_validhzs) {
|
||||
DictBuilder* dict_builder = new DictBuilder();
|
||||
|
||||
free_resource(true);
|
||||
|
||||
return dict_builder->build_dict(fn_raw, fn_validhzs, this);
|
||||
}
|
||||
|
||||
bool DictTrie::save_dict(FILE *fp) {
|
||||
if (NULL == fp)
|
||||
return false;
|
||||
|
||||
if (fwrite(&lma_node_num_le0_, sizeof(uint32), 1, fp) != 1)
|
||||
return false;
|
||||
|
||||
if (fwrite(&lma_node_num_ge1_, sizeof(uint32), 1, fp) != 1)
|
||||
return false;
|
||||
|
||||
if (fwrite(&lma_idx_buf_len_, sizeof(uint32), 1, fp) != 1)
|
||||
return false;
|
||||
|
||||
if (fwrite(&top_lmas_num_, sizeof(uint32), 1, fp) != 1)
|
||||
return false;
|
||||
|
||||
if (fwrite(root_, sizeof(LmaNodeLE0), lma_node_num_le0_, fp)
|
||||
!= lma_node_num_le0_)
|
||||
return false;
|
||||
|
||||
if (fwrite(nodes_ge1_, sizeof(LmaNodeGE1), lma_node_num_ge1_, fp)
|
||||
!= lma_node_num_ge1_)
|
||||
return false;
|
||||
|
||||
if (fwrite(lma_idx_buf_, sizeof(unsigned char), lma_idx_buf_len_, fp) !=
|
||||
lma_idx_buf_len_)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool DictTrie::save_dict(const char *filename) {
|
||||
if (NULL == filename)
|
||||
return false;
|
||||
|
||||
if (NULL == root_ || NULL == dict_list_)
|
||||
return false;
|
||||
|
||||
SpellingTrie &spl_trie = SpellingTrie::get_instance();
|
||||
NGram &ngram = NGram::get_instance();
|
||||
|
||||
FILE *fp = fopen(filename, "wb");
|
||||
if (NULL == fp)
|
||||
return false;
|
||||
|
||||
if (!spl_trie.save_spl_trie(fp) || !dict_list_->save_list(fp) ||
|
||||
!save_dict(fp) || !ngram.save_ngram(fp)) {
|
||||
fclose(fp);
|
||||
return false;
|
||||
}
|
||||
|
||||
fclose(fp);
|
||||
return true;
|
||||
}
|
||||
#endif // ___BUILD_MODEL___
|
||||
|
||||
bool DictTrie::load_dict(FILE *fp) {
|
||||
if (NULL == fp)
|
||||
return false;
|
||||
if (fread(&lma_node_num_le0_, sizeof(uint32), 1, fp) != 1)
|
||||
return false;
|
||||
|
||||
if (fread(&lma_node_num_ge1_, sizeof(uint32), 1, fp) != 1)
|
||||
return false;
|
||||
|
||||
if (fread(&lma_idx_buf_len_, sizeof(uint32), 1, fp) != 1)
|
||||
return false;
|
||||
|
||||
if (fread(&top_lmas_num_, sizeof(uint32), 1, fp) != 1 ||
|
||||
top_lmas_num_ >= lma_idx_buf_len_)
|
||||
return false;
|
||||
|
||||
free_resource(false);
|
||||
|
||||
root_ = static_cast<LmaNodeLE0*>
|
||||
(malloc(lma_node_num_le0_ * sizeof(LmaNodeLE0)));
|
||||
nodes_ge1_ = static_cast<LmaNodeGE1*>
|
||||
(malloc(lma_node_num_ge1_ * sizeof(LmaNodeGE1)));
|
||||
lma_idx_buf_ = (unsigned char*)malloc(lma_idx_buf_len_);
|
||||
total_lma_num_ = lma_idx_buf_len_ / kLemmaIdSize;
|
||||
|
||||
size_t buf_size = SpellingTrie::get_instance().get_spelling_num() + 1;
|
||||
assert(lma_node_num_le0_ <= buf_size);
|
||||
splid_le0_index_ = static_cast<uint16*>(malloc(buf_size * sizeof(uint16)));
|
||||
|
||||
// Init the space for parsing.
|
||||
parsing_marks_ = new ParsingMark[kMaxParsingMark];
|
||||
mile_stones_ = new MileStone[kMaxMileStone];
|
||||
reset_milestones(0, kFirstValidMileStoneHandle);
|
||||
|
||||
if (NULL == root_ || NULL == nodes_ge1_ || NULL == lma_idx_buf_ ||
|
||||
NULL == splid_le0_index_ || NULL == parsing_marks_ ||
|
||||
NULL == mile_stones_) {
|
||||
free_resource(false);
|
||||
return false;
|
||||
}
|
||||
|
||||
if (fread(root_, sizeof(LmaNodeLE0), lma_node_num_le0_, fp)
|
||||
!= lma_node_num_le0_)
|
||||
return false;
|
||||
|
||||
if (fread(nodes_ge1_, sizeof(LmaNodeGE1), lma_node_num_ge1_, fp)
|
||||
!= lma_node_num_ge1_)
|
||||
return false;
|
||||
|
||||
if (fread(lma_idx_buf_, sizeof(unsigned char), lma_idx_buf_len_, fp) !=
|
||||
lma_idx_buf_len_)
|
||||
return false;
|
||||
|
||||
// The quick index for the first level sons
|
||||
uint16 last_splid = kFullSplIdStart;
|
||||
size_t last_pos = 0;
|
||||
for (size_t i = 1; i < lma_node_num_le0_; i++) {
|
||||
for (uint16 splid = last_splid; splid < root_[i].spl_idx; splid++)
|
||||
splid_le0_index_[splid - kFullSplIdStart] = last_pos;
|
||||
|
||||
splid_le0_index_[root_[i].spl_idx - kFullSplIdStart] =
|
||||
static_cast<uint16>(i);
|
||||
last_splid = root_[i].spl_idx;
|
||||
last_pos = i;
|
||||
}
|
||||
|
||||
for (uint16 splid = last_splid + 1;
|
||||
splid < buf_size + kFullSplIdStart; splid++) {
|
||||
assert(static_cast<size_t>(splid - kFullSplIdStart) < buf_size);
|
||||
splid_le0_index_[splid - kFullSplIdStart] = last_pos + 1;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool DictTrie::load_dict(const char *filename, LemmaIdType start_id,
|
||||
LemmaIdType end_id) {
|
||||
if (NULL == filename || end_id <= start_id)
|
||||
return false;
|
||||
|
||||
FILE *fp = fopen(filename, "rb");
|
||||
if (NULL == fp)
|
||||
return false;
|
||||
|
||||
free_resource(true);
|
||||
|
||||
dict_list_ = new DictList();
|
||||
if (NULL == dict_list_) {
|
||||
fclose(fp);
|
||||
return false;
|
||||
}
|
||||
|
||||
SpellingTrie &spl_trie = SpellingTrie::get_instance();
|
||||
NGram &ngram = NGram::get_instance();
|
||||
|
||||
if (!spl_trie.load_spl_trie(fp) || !dict_list_->load_list(fp) ||
|
||||
!load_dict(fp) || !ngram.load_ngram(fp) ||
|
||||
total_lma_num_ > end_id - start_id + 1) {
|
||||
free_resource(true);
|
||||
fclose(fp);
|
||||
return false;
|
||||
}
|
||||
|
||||
fclose(fp);
|
||||
return true;
|
||||
}
|
||||
|
||||
bool DictTrie::load_dict_fd(int sys_fd, long start_offset,
|
||||
long length, LemmaIdType start_id,
|
||||
LemmaIdType end_id) {
|
||||
if (start_offset < 0 || length <= 0 || end_id <= start_id)
|
||||
return false;
|
||||
|
||||
FILE *fp = fdopen(sys_fd, "rb");
|
||||
if (NULL == fp)
|
||||
return false;
|
||||
|
||||
if (-1 == fseek(fp, start_offset, SEEK_SET)) {
|
||||
fclose(fp);
|
||||
return false;
|
||||
}
|
||||
|
||||
free_resource(true);
|
||||
|
||||
dict_list_ = new DictList();
|
||||
if (NULL == dict_list_) {
|
||||
fclose(fp);
|
||||
return false;
|
||||
}
|
||||
|
||||
SpellingTrie &spl_trie = SpellingTrie::get_instance();
|
||||
NGram &ngram = NGram::get_instance();
|
||||
|
||||
if (!spl_trie.load_spl_trie(fp) || !dict_list_->load_list(fp) ||
|
||||
!load_dict(fp) || !ngram.load_ngram(fp) ||
|
||||
ftell(fp) < start_offset + length ||
|
||||
total_lma_num_ > end_id - start_id + 1) {
|
||||
free_resource(true);
|
||||
fclose(fp);
|
||||
return false;
|
||||
}
|
||||
|
||||
fclose(fp);
|
||||
return true;
|
||||
}
|
||||
|
||||
size_t DictTrie::fill_lpi_buffer(LmaPsbItem lpi_items[], size_t lpi_max,
|
||||
LmaNodeLE0 *node) {
|
||||
size_t lpi_num = 0;
|
||||
NGram& ngram = NGram::get_instance();
|
||||
for (size_t homo = 0; homo < (size_t)node->num_of_homo; homo++) {
|
||||
lpi_items[lpi_num].id = get_lemma_id(node->homo_idx_buf_off +
|
||||
homo);
|
||||
lpi_items[lpi_num].lma_len = 1;
|
||||
lpi_items[lpi_num].psb =
|
||||
static_cast<LmaScoreType>(ngram.get_uni_psb(lpi_items[lpi_num].id));
|
||||
lpi_num++;
|
||||
if (lpi_num >= lpi_max)
|
||||
break;
|
||||
}
|
||||
|
||||
return lpi_num;
|
||||
}
|
||||
|
||||
size_t DictTrie::fill_lpi_buffer(LmaPsbItem lpi_items[], size_t lpi_max,
|
||||
size_t homo_buf_off, LmaNodeGE1 *node,
|
||||
uint16 lma_len) {
|
||||
size_t lpi_num = 0;
|
||||
NGram& ngram = NGram::get_instance();
|
||||
for (size_t homo = 0; homo < (size_t)node->num_of_homo; homo++) {
|
||||
lpi_items[lpi_num].id = get_lemma_id(homo_buf_off + homo);
|
||||
lpi_items[lpi_num].lma_len = lma_len;
|
||||
lpi_items[lpi_num].psb =
|
||||
static_cast<LmaScoreType>(ngram.get_uni_psb(lpi_items[lpi_num].id));
|
||||
lpi_num++;
|
||||
if (lpi_num >= lpi_max)
|
||||
break;
|
||||
}
|
||||
|
||||
return lpi_num;
|
||||
}
|
||||
|
||||
void DictTrie::reset_milestones(uint16 from_step, MileStoneHandle from_handle) {
|
||||
if (0 == from_step) {
|
||||
parsing_marks_pos_ = 0;
|
||||
mile_stones_pos_ = kFirstValidMileStoneHandle;
|
||||
} else {
|
||||
if (from_handle > 0 && from_handle < mile_stones_pos_) {
|
||||
mile_stones_pos_ = from_handle;
|
||||
|
||||
MileStone *mile_stone = mile_stones_ + from_handle;
|
||||
parsing_marks_pos_ = mile_stone->mark_start;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
MileStoneHandle DictTrie::extend_dict(MileStoneHandle from_handle,
|
||||
const DictExtPara *dep,
|
||||
LmaPsbItem *lpi_items, size_t lpi_max,
|
||||
size_t *lpi_num) {
|
||||
if (NULL == dep)
|
||||
return 0;
|
||||
|
||||
// from LmaNodeLE0 (root) to LmaNodeLE0
|
||||
if (0 == from_handle) {
|
||||
assert(0 == dep->splids_extended);
|
||||
return extend_dict0(from_handle, dep, lpi_items, lpi_max, lpi_num);
|
||||
}
|
||||
|
||||
// from LmaNodeLE0 to LmaNodeGE1
|
||||
if (1 == dep->splids_extended)
|
||||
return extend_dict1(from_handle, dep, lpi_items, lpi_max, lpi_num);
|
||||
|
||||
// From LmaNodeGE1 to LmaNodeGE1
|
||||
return extend_dict2(from_handle, dep, lpi_items, lpi_max, lpi_num);
|
||||
}
|
||||
|
||||
MileStoneHandle DictTrie::extend_dict0(MileStoneHandle from_handle,
|
||||
const DictExtPara *dep,
|
||||
LmaPsbItem *lpi_items,
|
||||
size_t lpi_max, size_t *lpi_num) {
|
||||
assert(NULL != dep && 0 == from_handle);
|
||||
*lpi_num = 0;
|
||||
MileStoneHandle ret_handle = 0;
|
||||
|
||||
uint16 splid = dep->splids[dep->splids_extended];
|
||||
uint16 id_start = dep->id_start;
|
||||
uint16 id_num = dep->id_num;
|
||||
|
||||
LpiCache& lpi_cache = LpiCache::get_instance();
|
||||
bool cached = lpi_cache.is_cached(splid);
|
||||
|
||||
// 2. Begin exgtending
|
||||
// 2.1 Get the LmaPsbItem list
|
||||
LmaNodeLE0 *node = root_;
|
||||
size_t son_start = splid_le0_index_[id_start - kFullSplIdStart];
|
||||
size_t son_end = splid_le0_index_[id_start + id_num - kFullSplIdStart];
|
||||
for (size_t son_pos = son_start; son_pos < son_end; son_pos++) {
|
||||
assert(1 == node->son_1st_off);
|
||||
LmaNodeLE0 *son = root_ + son_pos;
|
||||
assert(son->spl_idx >= id_start && son->spl_idx < id_start + id_num);
|
||||
|
||||
if (!cached && *lpi_num < lpi_max) {
|
||||
bool need_lpi = true;
|
||||
if (spl_trie_->is_half_id_yunmu(splid) && son_pos != son_start)
|
||||
need_lpi = false;
|
||||
|
||||
if (need_lpi)
|
||||
*lpi_num += fill_lpi_buffer(lpi_items + (*lpi_num),
|
||||
lpi_max - *lpi_num, son);
|
||||
}
|
||||
|
||||
// If necessary, fill in a new mile stone.
|
||||
if (son->spl_idx == id_start) {
|
||||
if (mile_stones_pos_ < kMaxMileStone &&
|
||||
parsing_marks_pos_ < kMaxParsingMark) {
|
||||
parsing_marks_[parsing_marks_pos_].node_offset = son_pos;
|
||||
parsing_marks_[parsing_marks_pos_].node_num = id_num;
|
||||
mile_stones_[mile_stones_pos_].mark_start = parsing_marks_pos_;
|
||||
mile_stones_[mile_stones_pos_].mark_num = 1;
|
||||
ret_handle = mile_stones_pos_;
|
||||
parsing_marks_pos_++;
|
||||
mile_stones_pos_++;
|
||||
}
|
||||
}
|
||||
|
||||
if (son->spl_idx >= id_start + id_num -1)
|
||||
break;
|
||||
}
|
||||
|
||||
// printf("----- parsing marks: %d, mile stone: %d \n", parsing_marks_pos_,
|
||||
// mile_stones_pos_);
|
||||
return ret_handle;
|
||||
}
|
||||
|
||||
MileStoneHandle DictTrie::extend_dict1(MileStoneHandle from_handle,
|
||||
const DictExtPara *dep,
|
||||
LmaPsbItem *lpi_items,
|
||||
size_t lpi_max, size_t *lpi_num) {
|
||||
assert(NULL != dep && from_handle > 0 && from_handle < mile_stones_pos_);
|
||||
|
||||
MileStoneHandle ret_handle = 0;
|
||||
|
||||
// 1. If this is a half Id, get its corresponding full starting Id and
|
||||
// number of full Id.
|
||||
size_t ret_val = 0;
|
||||
|
||||
uint16 id_start = dep->id_start;
|
||||
uint16 id_num = dep->id_num;
|
||||
|
||||
// 2. Begin extending.
|
||||
MileStone *mile_stone = mile_stones_ + from_handle;
|
||||
|
||||
for (uint16 h_pos = 0; h_pos < mile_stone->mark_num; h_pos++) {
|
||||
ParsingMark p_mark = parsing_marks_[mile_stone->mark_start + h_pos];
|
||||
uint16 ext_num = p_mark.node_num;
|
||||
for (uint16 ext_pos = 0; ext_pos < ext_num; ext_pos++) {
|
||||
LmaNodeLE0 *node = root_ + p_mark.node_offset + ext_pos;
|
||||
size_t found_start = 0;
|
||||
size_t found_num = 0;
|
||||
for (size_t son_pos = 0; son_pos < (size_t)node->num_of_son; son_pos++) {
|
||||
assert(node->son_1st_off <= lma_node_num_ge1_);
|
||||
LmaNodeGE1 *son = nodes_ge1_ + node->son_1st_off + son_pos;
|
||||
if (son->spl_idx >= id_start
|
||||
&& son->spl_idx < id_start + id_num) {
|
||||
if (*lpi_num < lpi_max) {
|
||||
size_t homo_buf_off = get_homo_idx_buf_offset(son);
|
||||
*lpi_num += fill_lpi_buffer(lpi_items + (*lpi_num),
|
||||
lpi_max - *lpi_num, homo_buf_off, son,
|
||||
2);
|
||||
}
|
||||
|
||||
// If necessary, fill in the new DTMI
|
||||
if (0 == found_num) {
|
||||
found_start = son_pos;
|
||||
}
|
||||
found_num++;
|
||||
}
|
||||
if (son->spl_idx >= id_start + id_num - 1 || son_pos ==
|
||||
(size_t)node->num_of_son - 1) {
|
||||
if (found_num > 0) {
|
||||
if (mile_stones_pos_ < kMaxMileStone &&
|
||||
parsing_marks_pos_ < kMaxParsingMark) {
|
||||
parsing_marks_[parsing_marks_pos_].node_offset =
|
||||
node->son_1st_off + found_start;
|
||||
parsing_marks_[parsing_marks_pos_].node_num = found_num;
|
||||
if (0 == ret_val)
|
||||
mile_stones_[mile_stones_pos_].mark_start =
|
||||
parsing_marks_pos_;
|
||||
parsing_marks_pos_++;
|
||||
}
|
||||
|
||||
ret_val++;
|
||||
}
|
||||
break;
|
||||
} // for son_pos
|
||||
} // for ext_pos
|
||||
} // for h_pos
|
||||
}
|
||||
|
||||
if (ret_val > 0) {
|
||||
mile_stones_[mile_stones_pos_].mark_num = ret_val;
|
||||
ret_handle = mile_stones_pos_;
|
||||
mile_stones_pos_++;
|
||||
ret_val = 1;
|
||||
}
|
||||
|
||||
// printf("----- parsing marks: %d, mile stone: %d \n", parsing_marks_pos_,
|
||||
// mile_stones_pos_);
|
||||
return ret_handle;
|
||||
}
|
||||
|
||||
MileStoneHandle DictTrie::extend_dict2(MileStoneHandle from_handle,
|
||||
const DictExtPara *dep,
|
||||
LmaPsbItem *lpi_items,
|
||||
size_t lpi_max, size_t *lpi_num) {
|
||||
assert(NULL != dep && from_handle > 0 && from_handle < mile_stones_pos_);
|
||||
|
||||
MileStoneHandle ret_handle = 0;
|
||||
|
||||
// 1. If this is a half Id, get its corresponding full starting Id and
|
||||
// number of full Id.
|
||||
size_t ret_val = 0;
|
||||
|
||||
uint16 id_start = dep->id_start;
|
||||
uint16 id_num = dep->id_num;
|
||||
|
||||
// 2. Begin extending.
|
||||
MileStone *mile_stone = mile_stones_ + from_handle;
|
||||
|
||||
for (uint16 h_pos = 0; h_pos < mile_stone->mark_num; h_pos++) {
|
||||
ParsingMark p_mark = parsing_marks_[mile_stone->mark_start + h_pos];
|
||||
uint16 ext_num = p_mark.node_num;
|
||||
for (uint16 ext_pos = 0; ext_pos < ext_num; ext_pos++) {
|
||||
LmaNodeGE1 *node = nodes_ge1_ + p_mark.node_offset + ext_pos;
|
||||
size_t found_start = 0;
|
||||
size_t found_num = 0;
|
||||
|
||||
for (size_t son_pos = 0; son_pos < (size_t)node->num_of_son; son_pos++) {
|
||||
assert(node->son_1st_off_l > 0 || node->son_1st_off_h > 0);
|
||||
LmaNodeGE1 *son = nodes_ge1_ + get_son_offset(node) + son_pos;
|
||||
if (son->spl_idx >= id_start
|
||||
&& son->spl_idx < id_start + id_num) {
|
||||
if (*lpi_num < lpi_max) {
|
||||
size_t homo_buf_off = get_homo_idx_buf_offset(son);
|
||||
*lpi_num += fill_lpi_buffer(lpi_items + (*lpi_num),
|
||||
lpi_max - *lpi_num, homo_buf_off, son,
|
||||
dep->splids_extended + 1);
|
||||
}
|
||||
|
||||
// If necessary, fill in the new DTMI
|
||||
if (0 == found_num) {
|
||||
found_start = son_pos;
|
||||
}
|
||||
found_num++;
|
||||
}
|
||||
if (son->spl_idx >= id_start + id_num - 1 || son_pos ==
|
||||
(size_t)node->num_of_son - 1) {
|
||||
if (found_num > 0) {
|
||||
if (mile_stones_pos_ < kMaxMileStone &&
|
||||
parsing_marks_pos_ < kMaxParsingMark) {
|
||||
parsing_marks_[parsing_marks_pos_].node_offset =
|
||||
get_son_offset(node) + found_start;
|
||||
parsing_marks_[parsing_marks_pos_].node_num = found_num;
|
||||
if (0 == ret_val)
|
||||
mile_stones_[mile_stones_pos_].mark_start =
|
||||
parsing_marks_pos_;
|
||||
parsing_marks_pos_++;
|
||||
}
|
||||
|
||||
ret_val++;
|
||||
}
|
||||
break;
|
||||
}
|
||||
} // for son_pos
|
||||
} // for ext_pos
|
||||
} // for h_pos
|
||||
|
||||
if (ret_val > 0) {
|
||||
mile_stones_[mile_stones_pos_].mark_num = ret_val;
|
||||
ret_handle = mile_stones_pos_;
|
||||
mile_stones_pos_++;
|
||||
}
|
||||
|
||||
// printf("----- parsing marks: %d, mile stone: %d \n", parsing_marks_pos_,
|
||||
// mile_stones_pos_);
|
||||
return ret_handle;
|
||||
}
|
||||
|
||||
bool DictTrie::try_extend(const uint16 *splids, uint16 splid_num,
|
||||
LemmaIdType id_lemma) {
|
||||
if (0 == splid_num || NULL == splids)
|
||||
return false;
|
||||
|
||||
void *node = root_ + splid_le0_index_[splids[0] - kFullSplIdStart];
|
||||
|
||||
for (uint16 pos = 1; pos < splid_num; pos++) {
|
||||
if (1 == pos) {
|
||||
LmaNodeLE0 *node_le0 = reinterpret_cast<LmaNodeLE0*>(node);
|
||||
LmaNodeGE1 *node_son;
|
||||
uint16 son_pos;
|
||||
for (son_pos = 0; son_pos < static_cast<uint16>(node_le0->num_of_son);
|
||||
son_pos++) {
|
||||
assert(node_le0->son_1st_off <= lma_node_num_ge1_);
|
||||
node_son = nodes_ge1_ + node_le0->son_1st_off
|
||||
+ son_pos;
|
||||
if (node_son->spl_idx == splids[pos])
|
||||
break;
|
||||
}
|
||||
if (son_pos < node_le0->num_of_son)
|
||||
node = reinterpret_cast<void*>(node_son);
|
||||
else
|
||||
return false;
|
||||
} else {
|
||||
LmaNodeGE1 *node_ge1 = reinterpret_cast<LmaNodeGE1*>(node);
|
||||
LmaNodeGE1 *node_son;
|
||||
uint16 son_pos;
|
||||
for (son_pos = 0; son_pos < static_cast<uint16>(node_ge1->num_of_son);
|
||||
son_pos++) {
|
||||
assert(node_ge1->son_1st_off_l > 0 || node_ge1->son_1st_off_h > 0);
|
||||
node_son = nodes_ge1_ + get_son_offset(node_ge1) + son_pos;
|
||||
if (node_son->spl_idx == splids[pos])
|
||||
break;
|
||||
}
|
||||
if (son_pos < node_ge1->num_of_son)
|
||||
node = reinterpret_cast<void*>(node_son);
|
||||
else
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
if (1 == splid_num) {
|
||||
LmaNodeLE0* node_le0 = reinterpret_cast<LmaNodeLE0*>(node);
|
||||
size_t num_of_homo = (size_t)node_le0->num_of_homo;
|
||||
for (size_t homo_pos = 0; homo_pos < num_of_homo; homo_pos++) {
|
||||
LemmaIdType id_this = get_lemma_id(node_le0->homo_idx_buf_off + homo_pos);
|
||||
char16 str[2];
|
||||
get_lemma_str(id_this, str, 2);
|
||||
if (id_this == id_lemma)
|
||||
return true;
|
||||
}
|
||||
} else {
|
||||
LmaNodeGE1* node_ge1 = reinterpret_cast<LmaNodeGE1*>(node);
|
||||
size_t num_of_homo = (size_t)node_ge1->num_of_homo;
|
||||
for (size_t homo_pos = 0; homo_pos < num_of_homo; homo_pos++) {
|
||||
size_t node_homo_off = get_homo_idx_buf_offset(node_ge1);
|
||||
if (get_lemma_id(node_homo_off + homo_pos) == id_lemma)
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
size_t DictTrie::get_lpis(const uint16* splid_str, uint16 splid_str_len,
|
||||
LmaPsbItem* lma_buf, size_t max_lma_buf) {
|
||||
if (splid_str_len > kMaxLemmaSize)
|
||||
return 0;
|
||||
|
||||
#define MAX_EXTENDBUF_LEN 200
|
||||
|
||||
size_t* node_buf1[MAX_EXTENDBUF_LEN]; // use size_t for data alignment
|
||||
size_t* node_buf2[MAX_EXTENDBUF_LEN];
|
||||
LmaNodeLE0** node_fr_le0 =
|
||||
reinterpret_cast<LmaNodeLE0**>(node_buf1); // Nodes from.
|
||||
LmaNodeLE0** node_to_le0 =
|
||||
reinterpret_cast<LmaNodeLE0**>(node_buf2); // Nodes to.
|
||||
LmaNodeGE1** node_fr_ge1 = NULL;
|
||||
LmaNodeGE1** node_to_ge1 = NULL;
|
||||
size_t node_fr_num = 1;
|
||||
size_t node_to_num = 0;
|
||||
node_fr_le0[0] = root_;
|
||||
if (NULL == node_fr_le0[0])
|
||||
return 0;
|
||||
|
||||
size_t spl_pos = 0;
|
||||
|
||||
while (spl_pos < splid_str_len) {
|
||||
uint16 id_num = 1;
|
||||
uint16 id_start = splid_str[spl_pos];
|
||||
// If it is a half id
|
||||
if (spl_trie_->is_half_id(splid_str[spl_pos])) {
|
||||
id_num = spl_trie_->half_to_full(splid_str[spl_pos], &id_start);
|
||||
assert(id_num > 0);
|
||||
}
|
||||
|
||||
// Extend the nodes
|
||||
if (0 == spl_pos) { // From LmaNodeLE0 (root) to LmaNodeLE0 nodes
|
||||
for (size_t node_fr_pos = 0; node_fr_pos < node_fr_num; node_fr_pos++) {
|
||||
LmaNodeLE0 *node = node_fr_le0[node_fr_pos];
|
||||
assert(node == root_ && 1 == node_fr_num);
|
||||
size_t son_start = splid_le0_index_[id_start - kFullSplIdStart];
|
||||
size_t son_end =
|
||||
splid_le0_index_[id_start + id_num - kFullSplIdStart];
|
||||
for (size_t son_pos = son_start; son_pos < son_end; son_pos++) {
|
||||
assert(1 == node->son_1st_off);
|
||||
LmaNodeLE0 *node_son = root_ + son_pos;
|
||||
assert(node_son->spl_idx >= id_start
|
||||
&& node_son->spl_idx < id_start + id_num);
|
||||
if (node_to_num < MAX_EXTENDBUF_LEN) {
|
||||
node_to_le0[node_to_num] = node_son;
|
||||
node_to_num++;
|
||||
}
|
||||
// id_start + id_num - 1 is the last one, which has just been
|
||||
// recorded.
|
||||
if (node_son->spl_idx >= id_start + id_num - 1)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
spl_pos++;
|
||||
if (spl_pos >= splid_str_len || node_to_num == 0)
|
||||
break;
|
||||
// Prepare the nodes for next extending
|
||||
// next time, from LmaNodeLE0 to LmaNodeGE1
|
||||
LmaNodeLE0** node_tmp = node_fr_le0;
|
||||
node_fr_le0 = node_to_le0;
|
||||
node_to_le0 = NULL;
|
||||
node_to_ge1 = reinterpret_cast<LmaNodeGE1**>(node_tmp);
|
||||
} else if (1 == spl_pos) { // From LmaNodeLE0 to LmaNodeGE1 nodes
|
||||
for (size_t node_fr_pos = 0; node_fr_pos < node_fr_num; node_fr_pos++) {
|
||||
LmaNodeLE0 *node = node_fr_le0[node_fr_pos];
|
||||
for (size_t son_pos = 0; son_pos < (size_t)node->num_of_son;
|
||||
son_pos++) {
|
||||
assert(node->son_1st_off <= lma_node_num_ge1_);
|
||||
LmaNodeGE1 *node_son = nodes_ge1_ + node->son_1st_off
|
||||
+ son_pos;
|
||||
if (node_son->spl_idx >= id_start
|
||||
&& node_son->spl_idx < id_start + id_num) {
|
||||
if (node_to_num < MAX_EXTENDBUF_LEN) {
|
||||
node_to_ge1[node_to_num] = node_son;
|
||||
node_to_num++;
|
||||
}
|
||||
}
|
||||
// id_start + id_num - 1 is the last one, which has just been
|
||||
// recorded.
|
||||
if (node_son->spl_idx >= id_start + id_num - 1)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
spl_pos++;
|
||||
if (spl_pos >= splid_str_len || node_to_num == 0)
|
||||
break;
|
||||
// Prepare the nodes for next extending
|
||||
// next time, from LmaNodeGE1 to LmaNodeGE1
|
||||
node_fr_ge1 = node_to_ge1;
|
||||
node_to_ge1 = reinterpret_cast<LmaNodeGE1**>(node_fr_le0);
|
||||
node_fr_le0 = NULL;
|
||||
node_to_le0 = NULL;
|
||||
} else { // From LmaNodeGE1 to LmaNodeGE1 nodes
|
||||
for (size_t node_fr_pos = 0; node_fr_pos < node_fr_num; node_fr_pos++) {
|
||||
LmaNodeGE1 *node = node_fr_ge1[node_fr_pos];
|
||||
for (size_t son_pos = 0; son_pos < (size_t)node->num_of_son;
|
||||
son_pos++) {
|
||||
assert(node->son_1st_off_l > 0 || node->son_1st_off_h > 0);
|
||||
LmaNodeGE1 *node_son = nodes_ge1_
|
||||
+ get_son_offset(node) + son_pos;
|
||||
if (node_son->spl_idx >= id_start
|
||||
&& node_son->spl_idx < id_start + id_num) {
|
||||
if (node_to_num < MAX_EXTENDBUF_LEN) {
|
||||
node_to_ge1[node_to_num] = node_son;
|
||||
node_to_num++;
|
||||
}
|
||||
}
|
||||
// id_start + id_num - 1 is the last one, which has just been
|
||||
// recorded.
|
||||
if (node_son->spl_idx >= id_start + id_num - 1)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
spl_pos++;
|
||||
if (spl_pos >= splid_str_len || node_to_num == 0)
|
||||
break;
|
||||
// Prepare the nodes for next extending
|
||||
// next time, from LmaNodeGE1 to LmaNodeGE1
|
||||
LmaNodeGE1 **node_tmp = node_fr_ge1;
|
||||
node_fr_ge1 = node_to_ge1;
|
||||
node_to_ge1 = node_tmp;
|
||||
}
|
||||
|
||||
// The number of node for next extending
|
||||
node_fr_num = node_to_num;
|
||||
node_to_num = 0;
|
||||
} // while
|
||||
|
||||
if (0 == node_to_num)
|
||||
return 0;
|
||||
|
||||
NGram &ngram = NGram::get_instance();
|
||||
size_t lma_num = 0;
|
||||
|
||||
// If the length is 1, and the splid is a one-char Yunmu like 'a', 'o', 'e',
|
||||
// only those candidates for the full matched one-char id will be returned.
|
||||
if (1 == splid_str_len && spl_trie_->is_half_id_yunmu(splid_str[0]))
|
||||
node_to_num = node_to_num > 0 ? 1 : 0;
|
||||
|
||||
for (size_t node_pos = 0; node_pos < node_to_num; node_pos++) {
|
||||
size_t num_of_homo = 0;
|
||||
if (spl_pos <= 1) { // Get from LmaNodeLE0 nodes
|
||||
LmaNodeLE0* node_le0 = node_to_le0[node_pos];
|
||||
num_of_homo = (size_t)node_le0->num_of_homo;
|
||||
for (size_t homo_pos = 0; homo_pos < num_of_homo; homo_pos++) {
|
||||
size_t ch_pos = lma_num + homo_pos;
|
||||
lma_buf[ch_pos].id =
|
||||
get_lemma_id(node_le0->homo_idx_buf_off + homo_pos);
|
||||
lma_buf[ch_pos].lma_len = 1;
|
||||
lma_buf[ch_pos].psb =
|
||||
static_cast<LmaScoreType>(ngram.get_uni_psb(lma_buf[ch_pos].id));
|
||||
|
||||
if (lma_num + homo_pos >= max_lma_buf - 1)
|
||||
break;
|
||||
}
|
||||
} else { // Get from LmaNodeGE1 nodes
|
||||
LmaNodeGE1* node_ge1 = node_to_ge1[node_pos];
|
||||
num_of_homo = (size_t)node_ge1->num_of_homo;
|
||||
for (size_t homo_pos = 0; homo_pos < num_of_homo; homo_pos++) {
|
||||
size_t ch_pos = lma_num + homo_pos;
|
||||
size_t node_homo_off = get_homo_idx_buf_offset(node_ge1);
|
||||
lma_buf[ch_pos].id = get_lemma_id(node_homo_off + homo_pos);
|
||||
lma_buf[ch_pos].lma_len = splid_str_len;
|
||||
lma_buf[ch_pos].psb =
|
||||
static_cast<LmaScoreType>(ngram.get_uni_psb(lma_buf[ch_pos].id));
|
||||
|
||||
if (lma_num + homo_pos >= max_lma_buf - 1)
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
lma_num += num_of_homo;
|
||||
if (lma_num >= max_lma_buf) {
|
||||
lma_num = max_lma_buf;
|
||||
break;
|
||||
}
|
||||
}
|
||||
return lma_num;
|
||||
}
|
||||
|
||||
uint16 DictTrie::get_lemma_str(LemmaIdType id_lemma, char16 *str_buf,
|
||||
uint16 str_max) {
|
||||
return dict_list_->get_lemma_str(id_lemma, str_buf, str_max);
|
||||
}
|
||||
|
||||
uint16 DictTrie::get_lemma_splids(LemmaIdType id_lemma, uint16 *splids,
|
||||
uint16 splids_max, bool arg_valid) {
|
||||
char16 lma_str[kMaxLemmaSize + 1];
|
||||
uint16 lma_len = get_lemma_str(id_lemma, lma_str, kMaxLemmaSize + 1);
|
||||
assert((!arg_valid && splids_max >= lma_len) || lma_len == splids_max);
|
||||
|
||||
uint16 spl_mtrx[kMaxLemmaSize * 5];
|
||||
uint16 spl_start[kMaxLemmaSize + 1];
|
||||
spl_start[0] = 0;
|
||||
uint16 try_num = 1;
|
||||
|
||||
for (uint16 pos = 0; pos < lma_len; pos++) {
|
||||
uint16 cand_splids_this = 0;
|
||||
if (arg_valid && spl_trie_->is_full_id(splids[pos])) {
|
||||
spl_mtrx[spl_start[pos]] = splids[pos];
|
||||
cand_splids_this = 1;
|
||||
} else {
|
||||
cand_splids_this = dict_list_->get_splids_for_hanzi(lma_str[pos],
|
||||
arg_valid ? splids[pos] : 0, spl_mtrx + spl_start[pos],
|
||||
kMaxLemmaSize * 5 - spl_start[pos]);
|
||||
assert(cand_splids_this > 0);
|
||||
}
|
||||
spl_start[pos + 1] = spl_start[pos] + cand_splids_this;
|
||||
try_num *= cand_splids_this;
|
||||
}
|
||||
|
||||
for (uint16 try_pos = 0; try_pos < try_num; try_pos++) {
|
||||
uint16 mod = 1;
|
||||
for (uint16 pos = 0; pos < lma_len; pos++) {
|
||||
uint16 radix = spl_start[pos + 1] - spl_start[pos];
|
||||
splids[pos] = spl_mtrx[ spl_start[pos] + try_pos / mod % radix];
|
||||
mod *= radix;
|
||||
}
|
||||
|
||||
if (try_extend(splids, lma_len, id_lemma))
|
||||
return lma_len;
|
||||
}
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
void DictTrie::set_total_lemma_count_of_others(size_t count) {
|
||||
NGram& ngram = NGram::get_instance();
|
||||
ngram.set_total_freq_none_sys(count);
|
||||
}
|
||||
|
||||
void DictTrie::convert_to_hanzis(char16 *str, uint16 str_len) {
|
||||
return dict_list_->convert_to_hanzis(str, str_len);
|
||||
}
|
||||
|
||||
void DictTrie::convert_to_scis_ids(char16 *str, uint16 str_len) {
|
||||
return dict_list_->convert_to_scis_ids(str, str_len);
|
||||
}
|
||||
|
||||
LemmaIdType DictTrie::get_lemma_id(const char16 lemma_str[], uint16 lemma_len) {
|
||||
if (NULL == lemma_str || lemma_len > kMaxLemmaSize)
|
||||
return 0;
|
||||
|
||||
return dict_list_->get_lemma_id(lemma_str, lemma_len);
|
||||
}
|
||||
|
||||
size_t DictTrie::predict_top_lmas(size_t his_len, NPredictItem *npre_items,
|
||||
size_t npre_max, size_t b4_used) {
|
||||
NGram &ngram = NGram::get_instance();
|
||||
|
||||
size_t item_num = 0;
|
||||
size_t top_lmas_id_offset = lma_idx_buf_len_ / kLemmaIdSize - top_lmas_num_;
|
||||
size_t top_lmas_pos = 0;
|
||||
while (item_num < npre_max && top_lmas_pos < top_lmas_num_) {
|
||||
memset(npre_items + item_num, 0, sizeof(NPredictItem));
|
||||
LemmaIdType top_lma_id = get_lemma_id(top_lmas_id_offset + top_lmas_pos);
|
||||
top_lmas_pos += 1;
|
||||
if (dict_list_->get_lemma_str(top_lma_id,
|
||||
npre_items[item_num].pre_hzs,
|
||||
kMaxLemmaSize - 1) == 0) {
|
||||
continue;
|
||||
}
|
||||
npre_items[item_num].psb = ngram.get_uni_psb(top_lma_id);
|
||||
npre_items[item_num].his_len = his_len;
|
||||
item_num++;
|
||||
}
|
||||
return item_num;
|
||||
}
|
||||
|
||||
size_t DictTrie::predict(const char16 *last_hzs, uint16 hzs_len,
|
||||
NPredictItem *npre_items, size_t npre_max,
|
||||
size_t b4_used) {
|
||||
return dict_list_->predict(last_hzs, hzs_len, npre_items, npre_max, b4_used);
|
||||
}
|
||||
} // namespace ime_pinyin
|
|
@ -0,0 +1,233 @@
|
|||
/*
|
||||
* Copyright (C) 2009 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef PINYINIME_INCLUDE_DICTTRIE_H__
|
||||
#define PINYINIME_INCLUDE_DICTTRIE_H__
|
||||
|
||||
#include <stdlib.h>
|
||||
#include "./atomdictbase.h"
|
||||
#include "./dictdef.h"
|
||||
#include "./dictlist.h"
|
||||
#include "./searchutility.h"
|
||||
|
||||
namespace ime_pinyin {
|
||||
|
||||
class DictTrie : AtomDictBase {
|
||||
private:
|
||||
struct ParsingMark {
|
||||
size_t node_offset:24;
|
||||
size_t node_num:8; // Number of nodes with this spelling id given
|
||||
// by spl_id. If spl_id is a Shengmu, for nodes
|
||||
// in the first layer of DictTrie, it equals to
|
||||
// SpellingTrie::shm2full_num(); but for those
|
||||
// nodes which are not in the first layer,
|
||||
// node_num < SpellingTrie::shm2full_num().
|
||||
// For a full spelling id, node_num = 1;
|
||||
};
|
||||
|
||||
// Used to indicate an extended mile stone.
|
||||
// An extended mile stone is used to mark a partial match in the dictionary
|
||||
// trie to speed up further potential extending.
|
||||
// For example, when the user inputs "w", a mile stone is created to mark the
|
||||
// partial match status, so that when user inputs another char 'm', it will be
|
||||
// faster to extend search space based on this mile stone.
|
||||
//
|
||||
// For partial match status of "wm", there can be more than one sub mile
|
||||
// stone, for example, "wm" can be matched to "wanm", "wom", ..., etc, so
|
||||
// there may be more one parsing mark used to mark these partial matchings.
|
||||
// A mile stone records the starting position in the mark list and number of
|
||||
// marks.
|
||||
struct MileStone {
|
||||
uint16 mark_start;
|
||||
uint16 mark_num;
|
||||
};
|
||||
|
||||
DictList* dict_list_;
|
||||
|
||||
const SpellingTrie *spl_trie_;
|
||||
|
||||
LmaNodeLE0* root_; // Nodes for root and the first layer.
|
||||
LmaNodeGE1* nodes_ge1_; // Nodes for other layers.
|
||||
|
||||
// An quick index from spelling id to the LmaNodeLE0 node buffer, or
|
||||
// to the root_ buffer.
|
||||
// Index length:
|
||||
// SpellingTrie::get_instance().get_spelling_num() + 1. The last one is used
|
||||
// to get the end.
|
||||
// All Shengmu ids are not indexed because they will be converted into
|
||||
// corresponding full ids.
|
||||
// So, given an id splid, the son is:
|
||||
// root_[splid_le0_index_[splid - kFullSplIdStart]]
|
||||
uint16 *splid_le0_index_;
|
||||
|
||||
uint32 lma_node_num_le0_;
|
||||
uint32 lma_node_num_ge1_;
|
||||
|
||||
// The first part is for homophnies, and the last top_lma_num_ items are
|
||||
// lemmas with highest scores.
|
||||
unsigned char *lma_idx_buf_;
|
||||
uint32 lma_idx_buf_len_; // The total size of lma_idx_buf_ in byte.
|
||||
uint32 total_lma_num_; // Total number of lemmas in this dictionary.
|
||||
uint32 top_lmas_num_; // Number of lemma with highest scores.
|
||||
|
||||
// Parsing mark list used to mark the detailed extended statuses.
|
||||
ParsingMark *parsing_marks_;
|
||||
// The position for next available mark.
|
||||
uint16 parsing_marks_pos_;
|
||||
|
||||
// Mile stone list used to mark the extended status.
|
||||
MileStone *mile_stones_;
|
||||
// The position for the next available mile stone. We use positions (except 0)
|
||||
// as handles.
|
||||
MileStoneHandle mile_stones_pos_;
|
||||
|
||||
// Get the offset of sons for a node.
|
||||
inline size_t get_son_offset(const LmaNodeGE1 *node);
|
||||
|
||||
// Get the offset of homonious ids for a node.
|
||||
inline size_t get_homo_idx_buf_offset(const LmaNodeGE1 *node);
|
||||
|
||||
// Get the lemma id by the offset.
|
||||
inline LemmaIdType get_lemma_id(size_t id_offset);
|
||||
|
||||
void free_resource(bool free_dict_list);
|
||||
|
||||
bool load_dict(FILE *fp);
|
||||
|
||||
// Given a LmaNodeLE0 node, extract the lemmas specified by it, and fill
|
||||
// them into the lpi_items buffer.
|
||||
// This function is called by the search engine.
|
||||
size_t fill_lpi_buffer(LmaPsbItem lpi_items[], size_t max_size,
|
||||
LmaNodeLE0 *node);
|
||||
|
||||
// Given a LmaNodeGE1 node, extract the lemmas specified by it, and fill
|
||||
// them into the lpi_items buffer.
|
||||
// This function is called by inner functions extend_dict0(), extend_dict1()
|
||||
// and extend_dict2().
|
||||
size_t fill_lpi_buffer(LmaPsbItem lpi_items[], size_t max_size,
|
||||
size_t homo_buf_off, LmaNodeGE1 *node,
|
||||
uint16 lma_len);
|
||||
|
||||
// Extend in the trie from level 0.
|
||||
MileStoneHandle extend_dict0(MileStoneHandle from_handle,
|
||||
const DictExtPara *dep, LmaPsbItem *lpi_items,
|
||||
size_t lpi_max, size_t *lpi_num);
|
||||
|
||||
// Extend in the trie from level 1.
|
||||
MileStoneHandle extend_dict1(MileStoneHandle from_handle,
|
||||
const DictExtPara *dep, LmaPsbItem *lpi_items,
|
||||
size_t lpi_max, size_t *lpi_num);
|
||||
|
||||
// Extend in the trie from level 2.
|
||||
MileStoneHandle extend_dict2(MileStoneHandle from_handle,
|
||||
const DictExtPara *dep, LmaPsbItem *lpi_items,
|
||||
size_t lpi_max, size_t *lpi_num);
|
||||
|
||||
// Try to extend the given spelling id buffer, and if the given id_lemma can
|
||||
// be successfully gotten, return true;
|
||||
// The given spelling ids are all valid full ids.
|
||||
bool try_extend(const uint16 *splids, uint16 splid_num, LemmaIdType id_lemma);
|
||||
|
||||
#ifdef ___BUILD_MODEL___
|
||||
bool save_dict(FILE *fp);
|
||||
#endif // ___BUILD_MODEL___
|
||||
|
||||
static const int kMaxMileStone = 100;
|
||||
static const int kMaxParsingMark = 600;
|
||||
static const MileStoneHandle kFirstValidMileStoneHandle = 1;
|
||||
|
||||
friend class DictParser;
|
||||
friend class DictBuilder;
|
||||
|
||||
public:
|
||||
|
||||
DictTrie();
|
||||
~DictTrie();
|
||||
|
||||
#ifdef ___BUILD_MODEL___
|
||||
// Construct the tree from the file fn_raw.
|
||||
// fn_validhzs provide the valid hanzi list. If fn_validhzs is
|
||||
// NULL, only chars in GB2312 will be included.
|
||||
bool build_dict(const char *fn_raw, const char *fn_validhzs);
|
||||
|
||||
// Save the binary dictionary
|
||||
// Actually, the SpellingTrie/DictList instance will be also saved.
|
||||
bool save_dict(const char *filename);
|
||||
#endif // ___BUILD_MODEL___
|
||||
|
||||
void convert_to_hanzis(char16 *str, uint16 str_len);
|
||||
|
||||
void convert_to_scis_ids(char16 *str, uint16 str_len);
|
||||
|
||||
// Load a binary dictionary
|
||||
// The SpellingTrie instance/DictList will be also loaded
|
||||
bool load_dict(const char *filename, LemmaIdType start_id,
|
||||
LemmaIdType end_id);
|
||||
bool load_dict_fd(int sys_fd, long start_offset, long length,
|
||||
LemmaIdType start_id, LemmaIdType end_id);
|
||||
bool close_dict() {return true;}
|
||||
size_t number_of_lemmas() {return 0;}
|
||||
|
||||
void reset_milestones(uint16 from_step, MileStoneHandle from_handle);
|
||||
|
||||
MileStoneHandle extend_dict(MileStoneHandle from_handle,
|
||||
const DictExtPara *dep,
|
||||
LmaPsbItem *lpi_items,
|
||||
size_t lpi_max, size_t *lpi_num);
|
||||
|
||||
size_t get_lpis(const uint16 *splid_str, uint16 splid_str_len,
|
||||
LmaPsbItem *lpi_items, size_t lpi_max);
|
||||
|
||||
uint16 get_lemma_str(LemmaIdType id_lemma, char16 *str_buf, uint16 str_max);
|
||||
|
||||
uint16 get_lemma_splids(LemmaIdType id_lemma, uint16 *splids,
|
||||
uint16 splids_max, bool arg_valid);
|
||||
|
||||
size_t predict(const char16 *last_hzs, uint16 hzs_len,
|
||||
NPredictItem *npre_items, size_t npre_max,
|
||||
size_t b4_used);
|
||||
|
||||
LemmaIdType put_lemma(char16 /*lemma_str*/[], uint16 /*splids*/[],
|
||||
uint16 /*lemma_len*/, uint16 /*count*/) {return 0;}
|
||||
|
||||
LemmaIdType update_lemma(LemmaIdType /*lemma_id*/, int16 /*delta_count*/,
|
||||
bool /*selected*/) {return 0;}
|
||||
|
||||
LemmaIdType get_lemma_id(char16 /*lemma_str*/[], uint16 /*splids*/[],
|
||||
uint16 /*lemma_len*/) {return 0;}
|
||||
|
||||
LmaScoreType get_lemma_score(LemmaIdType /*lemma_id*/) {return 0;}
|
||||
|
||||
LmaScoreType get_lemma_score(char16 /*lemma_str*/[], uint16 /*splids*/[],
|
||||
uint16 /*lemma_len*/) {return 0;}
|
||||
|
||||
bool remove_lemma(LemmaIdType /*lemma_id*/) {return false;}
|
||||
|
||||
size_t get_total_lemma_count() {return 0;}
|
||||
void set_total_lemma_count_of_others(size_t count);
|
||||
|
||||
void flush_cache() {}
|
||||
|
||||
LemmaIdType get_lemma_id(const char16 lemma_str[], uint16 lemma_len);
|
||||
|
||||
// Fill the lemmas with highest scores to the prediction buffer.
|
||||
// his_len is the history length to fill in the prediction buffer.
|
||||
size_t predict_top_lmas(size_t his_len, NPredictItem *npre_items,
|
||||
size_t npre_max, size_t b4_used);
|
||||
};
|
||||
}
|
||||
|
||||
#endif // PINYINIME_INCLUDE_DICTTRIE_H__
|
|
@ -0,0 +1,59 @@
|
|||
QT -= gui
|
||||
|
||||
TARGET = googlepinyin
|
||||
TEMPLATE = lib
|
||||
CONFIG += staticlib
|
||||
|
||||
SOURCES += \
|
||||
dictbuilder.cpp \
|
||||
dictlist.cpp \
|
||||
dicttrie.cpp \
|
||||
lpicache.cpp \
|
||||
matrixsearch.cpp \
|
||||
mystdlib.cpp \
|
||||
ngram.cpp \
|
||||
pinyinime.cpp \
|
||||
searchutility.cpp \
|
||||
spellingtable.cpp \
|
||||
spellingtrie.cpp \
|
||||
splparser.cpp \
|
||||
sync.cpp \
|
||||
userdict.cpp \
|
||||
utf16char.cpp \
|
||||
utf16reader.cpp
|
||||
|
||||
HEADERS += \
|
||||
atomdictbase.h \
|
||||
dictbuilder.h \
|
||||
dictdef.h \
|
||||
dictlist.h \
|
||||
dicttrie.h \
|
||||
lpicache.h \
|
||||
matrixsearch.h \
|
||||
mystdlib.h \
|
||||
ngram.h \
|
||||
pinyinime.h \
|
||||
searchutility.h \
|
||||
spellingtable.h \
|
||||
spellingtrie.h \
|
||||
splparser.h \
|
||||
sync.h \
|
||||
userdict.h \
|
||||
utf16char.h \
|
||||
utf16reader.h
|
||||
|
||||
win32{
|
||||
CONFIG += debug_and_release build_all
|
||||
CONFIG(debug, debug|release){
|
||||
TARGET = ../../plugin/googlepinyin/$$join(TARGET,,,d)
|
||||
}CONFIG(release, debug|release){
|
||||
TARGET = ../../plugin/googlepinyin/$$TARGET
|
||||
}
|
||||
}
|
||||
unix{
|
||||
TARGET = ../plugin/googlepinyin/$$TARGET
|
||||
MOC_DIR = ../tmpfiles
|
||||
RCC_DIR = ../tmpfiles
|
||||
UI_DIR = ../tmpfiles
|
||||
OBJECTS_DIR = ../tmpfiles
|
||||
}
|
|
@ -0,0 +1,81 @@
|
|||
/*
|
||||
* Copyright (C) 2009 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
#include "lpicache.h"
|
||||
|
||||
namespace ime_pinyin {
|
||||
|
||||
LpiCache* LpiCache::instance_ = NULL;
|
||||
|
||||
LpiCache::LpiCache() {
|
||||
lpi_cache_ = new LmaPsbItem[kFullSplIdStart * kMaxLpiCachePerId];
|
||||
lpi_cache_len_ = new uint16[kFullSplIdStart];
|
||||
assert(NULL != lpi_cache_);
|
||||
assert(NULL != lpi_cache_len_);
|
||||
for (uint16 id = 0; id < kFullSplIdStart; id++)
|
||||
lpi_cache_len_[id] = 0;
|
||||
}
|
||||
|
||||
LpiCache::~LpiCache() {
|
||||
if (NULL != lpi_cache_)
|
||||
delete [] lpi_cache_;
|
||||
|
||||
if (NULL != lpi_cache_len_)
|
||||
delete [] lpi_cache_len_;
|
||||
}
|
||||
|
||||
LpiCache& LpiCache::get_instance() {
|
||||
if (NULL == instance_) {
|
||||
instance_ = new LpiCache();
|
||||
assert(NULL != instance_);
|
||||
}
|
||||
return *instance_;
|
||||
}
|
||||
|
||||
bool LpiCache::is_cached(uint16 splid) {
|
||||
if (splid >= kFullSplIdStart)
|
||||
return false;
|
||||
return lpi_cache_len_[splid] != 0;
|
||||
}
|
||||
|
||||
size_t LpiCache::put_cache(uint16 splid, LmaPsbItem lpi_items[],
|
||||
size_t lpi_num) {
|
||||
uint16 num = kMaxLpiCachePerId;
|
||||
if (num > lpi_num)
|
||||
num = static_cast<uint16>(lpi_num);
|
||||
|
||||
LmaPsbItem *lpi_cache_this = lpi_cache_ + splid * kMaxLpiCachePerId;
|
||||
for (uint16 pos = 0; pos < num; pos++)
|
||||
lpi_cache_this[pos] = lpi_items[pos];
|
||||
|
||||
lpi_cache_len_[splid] = num;
|
||||
return num;
|
||||
}
|
||||
|
||||
size_t LpiCache::get_cache(uint16 splid, LmaPsbItem lpi_items[],
|
||||
size_t lpi_max) {
|
||||
if (lpi_max > lpi_cache_len_[splid])
|
||||
lpi_max = lpi_cache_len_[splid];
|
||||
|
||||
LmaPsbItem *lpi_cache_this = lpi_cache_ + splid * kMaxLpiCachePerId;
|
||||
for (uint16 pos = 0; pos < lpi_max; pos++) {
|
||||
lpi_items[pos] = lpi_cache_this[pos];
|
||||
}
|
||||
return lpi_max;
|
||||
}
|
||||
|
||||
} // namespace ime_pinyin
|
|
@ -0,0 +1,62 @@
|
|||
/*
|
||||
* Copyright (C) 2009 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef PINYINIME_ANDPY_INCLUDE_LPICACHE_H__
|
||||
#define PINYINIME_ANDPY_INCLUDE_LPICACHE_H__
|
||||
|
||||
#include <stdlib.h>
|
||||
#include "./searchutility.h"
|
||||
#include "./spellingtrie.h"
|
||||
|
||||
namespace ime_pinyin {
|
||||
|
||||
// Used to cache LmaPsbItem list for half spelling ids.
|
||||
class LpiCache {
|
||||
private:
|
||||
static LpiCache *instance_;
|
||||
static const int kMaxLpiCachePerId = 15;
|
||||
|
||||
LmaPsbItem *lpi_cache_;
|
||||
uint16 *lpi_cache_len_;
|
||||
|
||||
public:
|
||||
LpiCache();
|
||||
~LpiCache();
|
||||
|
||||
static LpiCache& get_instance();
|
||||
|
||||
// Test if the LPI list of the given splid has been cached.
|
||||
// If splid is a full spelling id, it returns false, because we only cache
|
||||
// list for half ids.
|
||||
bool is_cached(uint16 splid);
|
||||
|
||||
// Put LPI list to cahce. If the length of the list, lpi_num, is longer than
|
||||
// the cache buffer. the list will be truncated, and function returns the
|
||||
// maximum length of the cache buffer.
|
||||
// Note: splid must be a half id, and lpi_items must be not NULL. The
|
||||
// caller of this function should guarantee this.
|
||||
size_t put_cache(uint16 splid, LmaPsbItem lpi_items[], size_t lpi_num);
|
||||
|
||||
// Get the cached list for the given half id.
|
||||
// Return the length of the cached buffer.
|
||||
// Note: splid must be a half id, and lpi_items must be not NULL. The
|
||||
// caller of this function should guarantee this.
|
||||
size_t get_cache(uint16 splid, LmaPsbItem lpi_items[], size_t lpi_max);
|
||||
};
|
||||
|
||||
} // namespace
|
||||
|
||||
#endif // PINYINIME_ANDPY_INCLUDE_LPICACHE_H__
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,460 @@
|
|||
/*
|
||||
* Copyright (C) 2009 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef PINYINIME_ANDPY_INCLUDE_MATRIXSEARCH_H__
|
||||
#define PINYINIME_ANDPY_INCLUDE_MATRIXSEARCH_H__
|
||||
|
||||
#include <stdlib.h>
|
||||
#include "./atomdictbase.h"
|
||||
#include "./dicttrie.h"
|
||||
#include "./searchutility.h"
|
||||
#include "./spellingtrie.h"
|
||||
#include "./splparser.h"
|
||||
|
||||
namespace ime_pinyin {
|
||||
|
||||
static const size_t kMaxRowNum = kMaxSearchSteps;
|
||||
|
||||
typedef struct {
|
||||
// MileStoneHandle objects for the system and user dictionaries.
|
||||
MileStoneHandle dict_handles[2];
|
||||
// From which DMI node. -1 means it's from root.
|
||||
PoolPosType dmi_fr;
|
||||
// The spelling id for the Pinyin string from the previous DMI to this node.
|
||||
// If it is a half id like Shengmu, the node pointed by dict_node is the first
|
||||
// node with this Shengmu,
|
||||
uint16 spl_id;
|
||||
// What's the level of the dict node. Level of root is 0, but root is never
|
||||
// recorded by dict_node.
|
||||
unsigned char dict_level:7;
|
||||
// If this node is for composing phrase, this bit is 1.
|
||||
unsigned char c_phrase:1;
|
||||
// Whether the spl_id is parsed with a split character at the end.
|
||||
unsigned char splid_end_split:1;
|
||||
// What's the length of the spelling string for this match, for the whole
|
||||
// word.
|
||||
unsigned char splstr_len:7;
|
||||
// Used to indicate whether all spelling ids from the root are full spelling
|
||||
// ids. This information is useful for keymapping mode(not finished). Because
|
||||
// in this mode, there is no clear boundaries, we prefer those results which
|
||||
// have full spelling ids.
|
||||
unsigned char all_full_id:1;
|
||||
} DictMatchInfo, *PDictMatchInfo;
|
||||
|
||||
typedef struct MatrixNode {
|
||||
LemmaIdType id;
|
||||
float score;
|
||||
MatrixNode *from;
|
||||
// From which DMI node. Used to trace the spelling segmentation.
|
||||
PoolPosType dmi_fr;
|
||||
uint16 step;
|
||||
} MatrixNode, *PMatrixNode;
|
||||
|
||||
typedef struct {
|
||||
// The MatrixNode position in the matrix pool
|
||||
PoolPosType mtrx_nd_pos;
|
||||
// The DictMatchInfo position in the DictMatchInfo pool.
|
||||
PoolPosType dmi_pos;
|
||||
uint16 mtrx_nd_num;
|
||||
uint16 dmi_num:15;
|
||||
// Used to indicate whether there are dmi nodes in this step with full
|
||||
// spelling id. This information is used to decide whether a substring of a
|
||||
// valid Pinyin should be extended.
|
||||
//
|
||||
// Example1: shoudao
|
||||
// When the last char 'o' is added, the parser will find "dao" is a valid
|
||||
// Pinyin, and because all dmi nodes at location 'd' (including those for
|
||||
// "shoud", and those for "d") have Shengmu id only, so it is not necessary
|
||||
// to extend "ao", otherwise the result may be "shoud ao", that is not
|
||||
// reasonable.
|
||||
//
|
||||
// Example2: hengao
|
||||
// When the last 'o' is added, the parser finds "gao" is a valid Pinyin.
|
||||
// Because some dmi nodes at 'g' has Shengmu ids (hen'g and g), but some dmi
|
||||
// nodes at 'g' has full ids ('heng'), so it is necessary to extend "ao", thus
|
||||
// "heng ao" can also be the result.
|
||||
//
|
||||
// Similarly, "ganga" is expanded to "gang a".
|
||||
//
|
||||
// For Pinyin string "xian", because "xian" is a valid Pinyin, because all dmi
|
||||
// nodes at 'x' only have Shengmu ids, the parser will not try "x ian" (and it
|
||||
// is not valid either). If the parser uses break in the loop, the result
|
||||
// always be "xian"; but if the parser uses continue in the loop, "xi an" will
|
||||
// also be tried. This behaviour can be set via the function
|
||||
// set_xi_an_switch().
|
||||
uint16 dmi_has_full_id:1;
|
||||
// Points to a MatrixNode of the current step to indicate which choice the
|
||||
// user selects.
|
||||
MatrixNode *mtrx_nd_fixed;
|
||||
} MatrixRow, *PMatrixRow;
|
||||
|
||||
// When user inputs and selects candidates, the fixed lemma ids are stored in
|
||||
// lma_id_ of class MatrixSearch, and fixed_lmas_ is used to indicate how many
|
||||
// lemmas from the beginning are fixed. If user deletes Pinyin characters one
|
||||
// by one from the end, these fixed lemmas can be unlocked one by one when
|
||||
// necessary. Whenever user deletes a Chinese character and its spelling string
|
||||
// in these fixed lemmas, all fixed lemmas will be merged together into a unit
|
||||
// named ComposingPhrase with a lemma id kLemmaIdComposing, and this composing
|
||||
// phrase will be the first lemma in the sentence. Because it contains some
|
||||
// modified lemmas (by deleting a character), these merged lemmas are called
|
||||
// sub lemmas (sublma), and each of them are represented individually, so that
|
||||
// when user deletes Pinyin characters from the end, these sub lemmas can also
|
||||
// be unlocked one by one.
|
||||
typedef struct {
|
||||
uint16 spl_ids[kMaxRowNum];
|
||||
uint16 spl_start[kMaxRowNum];
|
||||
char16 chn_str[kMaxRowNum]; // Chinese string.
|
||||
uint16 sublma_start[kMaxRowNum]; // Counted in Chinese characters.
|
||||
size_t sublma_num;
|
||||
uint16 length; // Counted in Chinese characters.
|
||||
} ComposingPhrase, *TComposingPhrase;
|
||||
|
||||
class MatrixSearch {
|
||||
private:
|
||||
// If it is true, prediction list by string whose length is greater than 1
|
||||
// will be limited to a reasonable number.
|
||||
static const bool kPredictLimitGt1 = false;
|
||||
|
||||
// If it is true, the engine will prefer long history based prediction,
|
||||
// for example, when user inputs "BeiJing", we prefer "DaXue", etc., which are
|
||||
// based on the two-character history.
|
||||
static const bool kPreferLongHistoryPredict = true;
|
||||
|
||||
// If it is true, prediction will only be based on user dictionary. this flag
|
||||
// is for debug purpose.
|
||||
static const bool kOnlyUserDictPredict = false;
|
||||
|
||||
// The maximum buffer to store LmaPsbItems.
|
||||
static const size_t kMaxLmaPsbItems = 1450;
|
||||
|
||||
// How many rows for each step.
|
||||
static const size_t kMaxNodeARow = 5;
|
||||
|
||||
// The maximum length of the sentence candidates counted in chinese
|
||||
// characters
|
||||
static const size_t kMaxSentenceLength = 16;
|
||||
|
||||
// The size of the matrix node pool.
|
||||
static const size_t kMtrxNdPoolSize = 200;
|
||||
|
||||
// The size of the DMI node pool.
|
||||
static const size_t kDmiPoolSize = 800;
|
||||
|
||||
// Used to indicate whether this object has been initialized.
|
||||
bool inited_;
|
||||
|
||||
// Spelling trie.
|
||||
const SpellingTrie *spl_trie_;
|
||||
|
||||
// Used to indicate this switcher status: when "xian" is parseed, should
|
||||
// "xi an" also be extended. Default is false.
|
||||
// These cases include: xia, xian, xiang, zhuan, jiang..., etc. The string
|
||||
// should be valid for a FULL spelling, or a combination of two spellings,
|
||||
// first of which is a FULL id too. So even it is true, "da" will never be
|
||||
// split into "d a", because "d" is not a full spelling id.
|
||||
bool xi_an_enabled_;
|
||||
|
||||
// System dictionary.
|
||||
DictTrie* dict_trie_;
|
||||
|
||||
// User dictionary.
|
||||
AtomDictBase* user_dict_;
|
||||
|
||||
// Spelling parser.
|
||||
SpellingParser* spl_parser_;
|
||||
|
||||
// The maximum allowed length of spelling string (such as a Pinyin string).
|
||||
size_t max_sps_len_;
|
||||
|
||||
// The maximum allowed length of a result Chinese string.
|
||||
size_t max_hzs_len_;
|
||||
|
||||
// Pinyin string. Max length: kMaxRowNum - 1
|
||||
char pys_[kMaxRowNum];
|
||||
|
||||
// The length of the string that has been decoded successfully.
|
||||
size_t pys_decoded_len_;
|
||||
|
||||
// Shared buffer for multiple purposes.
|
||||
size_t *share_buf_;
|
||||
|
||||
MatrixNode *mtrx_nd_pool_;
|
||||
PoolPosType mtrx_nd_pool_used_; // How many nodes used in the pool
|
||||
DictMatchInfo *dmi_pool_;
|
||||
PoolPosType dmi_pool_used_; // How many items used in the pool
|
||||
|
||||
MatrixRow *matrix_; // The first row is for starting
|
||||
|
||||
DictExtPara *dep_; // Parameter used to extend DMI nodes.
|
||||
|
||||
NPredictItem *npre_items_; // Used to do prediction
|
||||
size_t npre_items_len_;
|
||||
|
||||
// The starting positions and lemma ids for the full sentence candidate.
|
||||
size_t lma_id_num_;
|
||||
uint16 lma_start_[kMaxRowNum]; // Counted in spelling ids.
|
||||
LemmaIdType lma_id_[kMaxRowNum];
|
||||
size_t fixed_lmas_;
|
||||
|
||||
// If fixed_lmas_ is bigger than i, Element i is used to indicate whether
|
||||
// the i'th lemma id in lma_id_ is the first candidate for that step.
|
||||
// If all candidates are the first one for that step, the whole string can be
|
||||
// decoded by the engine automatically, so no need to add it to user
|
||||
// dictionary. (We are considering to add it to user dictionary in the
|
||||
// future).
|
||||
uint8 fixed_lmas_no1_[kMaxRowNum];
|
||||
|
||||
// Composing phrase
|
||||
ComposingPhrase c_phrase_;
|
||||
|
||||
// If dmi_c_phrase_ is true, the decoder will try to match the
|
||||
// composing phrase (And definitely it will match successfully). If it
|
||||
// is false, the decoder will try to match lemmas items in dictionaries.
|
||||
bool dmi_c_phrase_;
|
||||
|
||||
// The starting positions and spelling ids for the first full sentence
|
||||
// candidate.
|
||||
size_t spl_id_num_; // Number of splling ids
|
||||
uint16 spl_start_[kMaxRowNum]; // Starting positions
|
||||
uint16 spl_id_[kMaxRowNum]; // Spelling ids
|
||||
// Used to remember the last fixed position, counted in Hanzi.
|
||||
size_t fixed_hzs_;
|
||||
|
||||
// Lemma Items with possibility score, two purposes:
|
||||
// 1. In Viterbi decoding, this buffer is used to get all possible candidates
|
||||
// for current step;
|
||||
// 2. When the search is done, this buffer is used to get candiates from the
|
||||
// first un-fixed step and show them to the user.
|
||||
LmaPsbItem lpi_items_[kMaxLmaPsbItems];
|
||||
size_t lpi_total_;
|
||||
|
||||
// Assign the pointers with NULL. The caller makes sure that all pointers are
|
||||
// not valid before calling it. This function only will be called in the
|
||||
// construction function and free_resource().
|
||||
void reset_pointers_to_null();
|
||||
|
||||
bool alloc_resource();
|
||||
|
||||
void free_resource();
|
||||
|
||||
// Reset the search space totally.
|
||||
bool reset_search0();
|
||||
|
||||
// Reset the search space from ch_pos step. For example, if the original
|
||||
// input Pinyin is "an", reset_search(1) will reset the search space to the
|
||||
// result of "a". If the given position is out of range, return false.
|
||||
// if clear_fixed_this_step is true, and the ch_pos step is a fixed step,
|
||||
// clear its fixed status. if clear_dmi_his_step is true, clear the DMI nodes.
|
||||
// If clear_mtrx_this_sTep is true, clear the mtrx nodes of this step.
|
||||
// The DMI nodes will be kept.
|
||||
//
|
||||
// Note: this function should not destroy content of pys_.
|
||||
bool reset_search(size_t ch_pos, bool clear_fixed_this_step,
|
||||
bool clear_dmi_this_step, bool clear_mtrx_this_step);
|
||||
|
||||
// Delete a part of the content in pys_.
|
||||
void del_in_pys(size_t start, size_t len);
|
||||
|
||||
// Delete a spelling id and its corresponding Chinese character, and merge
|
||||
// the fixed lemmas into the composing phrase.
|
||||
// del_spl_pos indicates which spelling id needs to be delete.
|
||||
// This function will update the lemma and spelling segmentation information.
|
||||
// The caller guarantees that fixed_lmas_ > 0 and del_spl_pos is within
|
||||
// the fixed lemmas.
|
||||
void merge_fixed_lmas(size_t del_spl_pos);
|
||||
|
||||
// Get spelling start posistions and ids. The result will be stored in
|
||||
// spl_id_num_, spl_start_[], spl_id_[].
|
||||
// fixed_hzs_ will be also assigned.
|
||||
void get_spl_start_id();
|
||||
|
||||
// Get all lemma ids with match the given spelling id stream(shorter than the
|
||||
// maximum length of a word).
|
||||
// If pfullsent is not NULL, means the full sentence candidate may be the
|
||||
// same with the coming lemma string, if so, remove that lemma.
|
||||
// The result is sorted in descendant order by the frequency score.
|
||||
size_t get_lpis(const uint16* splid_str, size_t splid_str_len,
|
||||
LmaPsbItem* lma_buf, size_t max_lma_buf,
|
||||
const char16 *pfullsent, bool sort_by_psb);
|
||||
|
||||
uint16 get_lemma_str(LemmaIdType id_lemma, char16 *str_buf, uint16 str_max);
|
||||
|
||||
uint16 get_lemma_splids(LemmaIdType id_lemma, uint16 *splids,
|
||||
uint16 splids_max, bool arg_valid);
|
||||
|
||||
|
||||
// Extend a DMI node with a spelling id. ext_len is the length of the rows
|
||||
// to extend, actually, it is the size of the spelling string of splid.
|
||||
// return value can be 1 or 0.
|
||||
// 1 means a new DMI is filled in (dmi_pool_used_ is the next blank DMI in
|
||||
// the pool).
|
||||
// 0 means either the dmi node can not be extended with splid, or the splid
|
||||
// is a Shengmu id, which is only used to get lpi_items, or the result node
|
||||
// in DictTrie has no son, it is not nccessary to keep the new DMI.
|
||||
//
|
||||
// This function modifies the content of lpi_items_ and lpi_total_.
|
||||
// lpi_items_ is used to get the LmaPsbItem list, lpi_total_ returns the size.
|
||||
// The function's returned value has no relation with the value of lpi_num.
|
||||
//
|
||||
// If dmi == NULL, this function will extend the root node of DictTrie
|
||||
//
|
||||
// This function will not change dmi_nd_pool_used_. Please change it after
|
||||
// calling this function if necessary.
|
||||
//
|
||||
// The caller should guarantees that NULL != dep.
|
||||
size_t extend_dmi(DictExtPara *dep, DictMatchInfo *dmi_s);
|
||||
|
||||
// Extend dmi for the composing phrase.
|
||||
size_t extend_dmi_c(DictExtPara *dep, DictMatchInfo *dmi_s);
|
||||
|
||||
// Extend a MatrixNode with the give LmaPsbItem list.
|
||||
// res_row is the destination row number.
|
||||
// This function does not change mtrx_nd_pool_used_. Please change it after
|
||||
// calling this function if necessary.
|
||||
// return 0 always.
|
||||
size_t extend_mtrx_nd(MatrixNode *mtrx_nd, LmaPsbItem lpi_items[],
|
||||
size_t lpi_num, PoolPosType dmi_fr, size_t res_row);
|
||||
|
||||
|
||||
// Try to find a dmi node at step_to position, and the found dmi node should
|
||||
// match the given spelling id strings.
|
||||
PoolPosType match_dmi(size_t step_to, uint16 spl_ids[], uint16 spl_id_num);
|
||||
|
||||
bool add_char(char ch);
|
||||
bool prepare_add_char(char ch);
|
||||
|
||||
// Called after prepare_add_char, so the input char has been saved.
|
||||
bool add_char_qwerty();
|
||||
|
||||
// Prepare candidates from the last fixed hanzi position.
|
||||
void prepare_candidates();
|
||||
|
||||
// Is the character in step pos a splitter character?
|
||||
// The caller guarantees that the position is valid.
|
||||
bool is_split_at(uint16 pos);
|
||||
|
||||
void fill_dmi(DictMatchInfo *dmi, MileStoneHandle *handles,
|
||||
PoolPosType dmi_fr,
|
||||
uint16 spl_id, uint16 node_num, unsigned char dict_level,
|
||||
bool splid_end_split, unsigned char splstr_len,
|
||||
unsigned char all_full_id);
|
||||
|
||||
size_t inner_predict(const char16 fixed_scis_ids[], uint16 scis_num,
|
||||
char16 predict_buf[][kMaxPredictSize + 1],
|
||||
size_t buf_len);
|
||||
|
||||
// Add the first candidate to the user dictionary.
|
||||
bool try_add_cand0_to_userdict();
|
||||
|
||||
// Add a user lemma to the user dictionary. This lemma is a subset of
|
||||
// candidate 0. lma_from is from which lemma in lma_ids_, lma_num is the
|
||||
// number of lemmas to be combined together as a new lemma. The caller
|
||||
// gurantees that the combined new lemma's length is less or equal to
|
||||
// kMaxLemmaSize.
|
||||
bool add_lma_to_userdict(uint16 lma_from, uint16 lma_num, float score);
|
||||
|
||||
// Update dictionary frequencies.
|
||||
void update_dict_freq();
|
||||
|
||||
void debug_print_dmi(PoolPosType dmi_pos, uint16 nest_level);
|
||||
|
||||
public:
|
||||
MatrixSearch();
|
||||
~MatrixSearch();
|
||||
|
||||
bool init(const char *fn_sys_dict, const char *fn_usr_dict);
|
||||
|
||||
bool init_fd(int sys_fd, long start_offset, long length,
|
||||
const char *fn_usr_dict);
|
||||
|
||||
void init_user_dictionary(const char *fn_usr_dict);
|
||||
|
||||
bool is_user_dictionary_enabled() const;
|
||||
|
||||
void set_max_lens(size_t max_sps_len, size_t max_hzs_len);
|
||||
|
||||
void close();
|
||||
|
||||
void flush_cache();
|
||||
|
||||
void set_xi_an_switch(bool xi_an_enabled);
|
||||
|
||||
bool get_xi_an_switch();
|
||||
|
||||
// Reset the search space. Equivalent to reset_search(0).
|
||||
// If inited, always return true;
|
||||
bool reset_search();
|
||||
|
||||
// Search a Pinyin string.
|
||||
// Return value is the position successfully parsed.
|
||||
size_t search(const char *py, size_t py_len);
|
||||
|
||||
// Used to delete something in the Pinyin string kept by the engine, and do
|
||||
// a re-search.
|
||||
// Return value is the new length of Pinyin string kept by the engine which
|
||||
// is parsed successfully.
|
||||
// If is_pos_in_splid is false, pos is used to indicate that pos-th Pinyin
|
||||
// character needs to be deleted. If is_pos_in_splid is true, all Pinyin
|
||||
// characters for pos-th spelling id needs to be deleted.
|
||||
// If the deleted character(s) is just after a fixed lemma or sub lemma in
|
||||
// composing phrase, clear_fixed_this_step indicates whether we needs to
|
||||
// unlock the last fixed lemma or sub lemma.
|
||||
// If is_pos_in_splid is false, and pos-th character is in the range for the
|
||||
// fixed lemmas or composing string, this function will do nothing and just
|
||||
// return the result of the previous search.
|
||||
size_t delsearch(size_t pos, bool is_pos_in_splid,
|
||||
bool clear_fixed_this_step);
|
||||
|
||||
// Get the number of candiates, called after search().
|
||||
size_t get_candidate_num();
|
||||
|
||||
// Get the Pinyin string stored by the engine.
|
||||
// *decoded_len returns the length of the successfully decoded string.
|
||||
const char* get_pystr(size_t *decoded_len);
|
||||
|
||||
// Get the spelling boundaries for the first sentence candidate.
|
||||
// Number of spellings will be returned. The number of valid elements in
|
||||
// spl_start is one more than the return value because the last one is used
|
||||
// to indicate the beginning of the next un-input speling.
|
||||
// For a Pinyin "women", the returned value is 2, spl_start is [0, 2, 5] .
|
||||
size_t get_spl_start(const uint16 *&spl_start);
|
||||
|
||||
// Get one candiate string. If full sentence candidate is available, it will
|
||||
// be the first one.
|
||||
char16* get_candidate(size_t cand_id, char16 *cand_str, size_t max_len);
|
||||
|
||||
// Get the first candiate, which is a "full sentence".
|
||||
// retstr_len is not NULL, it will be used to return the string length.
|
||||
// If only_unfixed is true, only unfixed part will be fetched.
|
||||
char16* get_candidate0(char16* cand_str, size_t max_len,
|
||||
uint16 *retstr_len, bool only_unfixed);
|
||||
|
||||
// Choose a candidate. The decoder will do a search after the fixed position.
|
||||
size_t choose(size_t cand_id);
|
||||
|
||||
// Cancel the last choosing operation, and return the new number of choices.
|
||||
size_t cancel_last_choice();
|
||||
|
||||
// Get the length of fixed Hanzis.
|
||||
size_t get_fixedlen();
|
||||
|
||||
size_t get_predicts(const char16 fixed_buf[],
|
||||
char16 predict_buf[][kMaxPredictSize + 1],
|
||||
size_t buf_len);
|
||||
};
|
||||
}
|
||||
|
||||
#endif // PINYINIME_ANDPY_INCLUDE_MATRIXSEARCH_H__
|
|
@ -0,0 +1,34 @@
|
|||
/*
|
||||
* Copyright (C) 2009 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
namespace ime_pinyin {
|
||||
|
||||
// For debug purpose. You can add a fixed version of qsort and bsearch functions
|
||||
// here so that the output will be totally the same under different platforms.
|
||||
|
||||
void myqsort(void *p, size_t n, size_t es,
|
||||
int (*cmp)(const void *, const void *)) {
|
||||
qsort(p,n, es, cmp);
|
||||
}
|
||||
|
||||
void *mybsearch(const void *k, const void *b,
|
||||
size_t n, size_t es,
|
||||
int (*cmp)(const void *, const void *)) {
|
||||
return bsearch(k, b, n, es, cmp);
|
||||
}
|
||||
} // namespace ime_pinyin
|
|
@ -0,0 +1,32 @@
|
|||
/*
|
||||
* Copyright (C) 2009 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef PINYINIME_INCLUDE_MYSTDLIB_H__
|
||||
#define PINYINIME_INCLUDE_MYSTDLIB_H__
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
namespace ime_pinyin {
|
||||
|
||||
void myqsort(void *p, size_t n, size_t es,
|
||||
int (*cmp)(const void *, const void *));
|
||||
|
||||
void *mybsearch(const void *key, const void *base,
|
||||
size_t nmemb, size_t size,
|
||||
int (*compar)(const void *, const void *));
|
||||
}
|
||||
|
||||
#endif // PINYINIME_INCLUDE_MYSTDLIB_H__
|
|
@ -0,0 +1,342 @@
|
|||
/*
|
||||
* Copyright (C) 2009 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
#include <math.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <time.h>
|
||||
#include "mystdlib.h"
|
||||
#include "ngram.h"
|
||||
|
||||
namespace ime_pinyin {
|
||||
|
||||
#define ADD_COUNT 0.3
|
||||
|
||||
int comp_double(const void *p1, const void *p2) {
|
||||
if (*static_cast<const double*>(p1) < *static_cast<const double*>(p2))
|
||||
return -1;
|
||||
if (*static_cast<const double*>(p1) > *static_cast<const double*>(p2))
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
inline double distance(double freq, double code) {
|
||||
// return fabs(freq - code);
|
||||
return freq * fabs(log(freq) - log(code));
|
||||
}
|
||||
|
||||
// Find the index of the code value which is nearest to the given freq
|
||||
int qsearch_nearest(double code_book[], double freq, int start, int end) {
|
||||
if (start == end)
|
||||
return start;
|
||||
|
||||
if (start + 1 == end) {
|
||||
if (distance(freq, code_book[end]) > distance(freq, code_book[start]))
|
||||
return start;
|
||||
return end;
|
||||
}
|
||||
|
||||
int mid = (start + end) / 2;
|
||||
|
||||
if (code_book[mid] > freq)
|
||||
return qsearch_nearest(code_book, freq, start, mid);
|
||||
else
|
||||
return qsearch_nearest(code_book, freq, mid, end);
|
||||
}
|
||||
|
||||
size_t update_code_idx(double freqs[], size_t num, double code_book[],
|
||||
CODEBOOK_TYPE *code_idx) {
|
||||
size_t changed = 0;
|
||||
for (size_t pos = 0; pos < num; pos++) {
|
||||
CODEBOOK_TYPE idx;
|
||||
idx = qsearch_nearest(code_book, freqs[pos], 0, kCodeBookSize - 1);
|
||||
if (idx != code_idx[pos])
|
||||
changed++;
|
||||
code_idx[pos] = idx;
|
||||
}
|
||||
return changed;
|
||||
}
|
||||
|
||||
double recalculate_kernel(double freqs[], size_t num, double code_book[],
|
||||
CODEBOOK_TYPE *code_idx) {
|
||||
double ret = 0;
|
||||
|
||||
size_t *item_num = new size_t[kCodeBookSize];
|
||||
assert(item_num);
|
||||
memset(item_num, 0, sizeof(size_t) * kCodeBookSize);
|
||||
|
||||
double *cb_new = new double[kCodeBookSize];
|
||||
assert(cb_new);
|
||||
memset(cb_new, 0, sizeof(double) * kCodeBookSize);
|
||||
|
||||
for (size_t pos = 0; pos < num; pos++) {
|
||||
ret += distance(freqs[pos], code_book[code_idx[pos]]);
|
||||
|
||||
cb_new[code_idx[pos]] += freqs[pos];
|
||||
item_num[code_idx[pos]] += 1;
|
||||
}
|
||||
|
||||
for (size_t code = 0; code < kCodeBookSize; code++) {
|
||||
assert(item_num[code] > 0);
|
||||
code_book[code] = cb_new[code] / item_num[code];
|
||||
}
|
||||
|
||||
delete [] item_num;
|
||||
delete [] cb_new;
|
||||
|
||||
return ret;
|
||||
}
|
||||
|
||||
void iterate_codes(double freqs[], size_t num, double code_book[],
|
||||
CODEBOOK_TYPE *code_idx) {
|
||||
size_t iter_num = 0;
|
||||
double delta_last = 0;
|
||||
do {
|
||||
size_t changed = update_code_idx(freqs, num, code_book, code_idx);
|
||||
|
||||
double delta = recalculate_kernel(freqs, num, code_book, code_idx);
|
||||
|
||||
if (kPrintDebug0) {
|
||||
printf("---Unigram codebook iteration: %d : %d, %.9f\n",
|
||||
iter_num, changed, delta);
|
||||
}
|
||||
iter_num++;
|
||||
|
||||
if (iter_num > 1 &&
|
||||
(delta == 0 || fabs(delta_last - delta)/fabs(delta) < 0.000000001))
|
||||
break;
|
||||
delta_last = delta;
|
||||
} while (true);
|
||||
}
|
||||
|
||||
|
||||
NGram* NGram::instance_ = NULL;
|
||||
|
||||
NGram::NGram() {
|
||||
initialized_ = false;
|
||||
idx_num_ = 0;
|
||||
lma_freq_idx_ = NULL;
|
||||
sys_score_compensation_ = 0;
|
||||
|
||||
#ifdef ___BUILD_MODEL___
|
||||
freq_codes_df_ = NULL;
|
||||
#endif
|
||||
freq_codes_ = NULL;
|
||||
}
|
||||
|
||||
NGram::~NGram() {
|
||||
if (NULL != lma_freq_idx_)
|
||||
free(lma_freq_idx_);
|
||||
|
||||
#ifdef ___BUILD_MODEL___
|
||||
if (NULL != freq_codes_df_)
|
||||
free(freq_codes_df_);
|
||||
#endif
|
||||
|
||||
if (NULL != freq_codes_)
|
||||
free(freq_codes_);
|
||||
}
|
||||
|
||||
NGram& NGram::get_instance() {
|
||||
if (NULL == instance_)
|
||||
instance_ = new NGram();
|
||||
return *instance_;
|
||||
}
|
||||
|
||||
bool NGram::save_ngram(FILE *fp) {
|
||||
if (!initialized_ || NULL == fp)
|
||||
return false;
|
||||
|
||||
if (0 == idx_num_ || NULL == freq_codes_ || NULL == lma_freq_idx_)
|
||||
return false;
|
||||
|
||||
if (fwrite(&idx_num_, sizeof(uint32), 1, fp) != 1)
|
||||
return false;
|
||||
|
||||
if (fwrite(freq_codes_, sizeof(LmaScoreType), kCodeBookSize, fp) !=
|
||||
kCodeBookSize)
|
||||
return false;
|
||||
|
||||
if (fwrite(lma_freq_idx_, sizeof(CODEBOOK_TYPE), idx_num_, fp) != idx_num_)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool NGram::load_ngram(FILE *fp) {
|
||||
if (NULL == fp)
|
||||
return false;
|
||||
|
||||
initialized_ = false;
|
||||
|
||||
if (fread(&idx_num_, sizeof(uint32), 1, fp) != 1 )
|
||||
return false;
|
||||
|
||||
if (NULL != lma_freq_idx_)
|
||||
free(lma_freq_idx_);
|
||||
|
||||
if (NULL != freq_codes_)
|
||||
free(freq_codes_);
|
||||
|
||||
lma_freq_idx_ = static_cast<CODEBOOK_TYPE*>
|
||||
(malloc(idx_num_ * sizeof(CODEBOOK_TYPE)));
|
||||
freq_codes_ = static_cast<LmaScoreType*>
|
||||
(malloc(kCodeBookSize * sizeof(LmaScoreType)));
|
||||
|
||||
if (NULL == lma_freq_idx_ || NULL == freq_codes_)
|
||||
return false;
|
||||
|
||||
if (fread(freq_codes_, sizeof(LmaScoreType), kCodeBookSize, fp) !=
|
||||
kCodeBookSize)
|
||||
return false;
|
||||
|
||||
if (fread(lma_freq_idx_, sizeof(CODEBOOK_TYPE), idx_num_, fp) != idx_num_)
|
||||
return false;
|
||||
|
||||
initialized_ = true;
|
||||
|
||||
total_freq_none_sys_ = 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
void NGram::set_total_freq_none_sys(size_t freq_none_sys) {
|
||||
total_freq_none_sys_ = freq_none_sys;
|
||||
if (0 == total_freq_none_sys_) {
|
||||
sys_score_compensation_ = 0;
|
||||
} else {
|
||||
double factor = static_cast<double>(kSysDictTotalFreq) / (
|
||||
kSysDictTotalFreq + total_freq_none_sys_);
|
||||
sys_score_compensation_ = static_cast<float>(
|
||||
log(factor) * kLogValueAmplifier);
|
||||
}
|
||||
}
|
||||
|
||||
// The caller makes sure this oject is initialized.
|
||||
float NGram::get_uni_psb(LemmaIdType lma_id) {
|
||||
return static_cast<float>(freq_codes_[lma_freq_idx_[lma_id]]) +
|
||||
sys_score_compensation_;
|
||||
}
|
||||
|
||||
float NGram::convert_psb_to_score(double psb) {
|
||||
float score = static_cast<float>(
|
||||
log(psb) * static_cast<double>(kLogValueAmplifier));
|
||||
if (score > static_cast<float>(kMaxScore)) {
|
||||
score = static_cast<float>(kMaxScore);
|
||||
}
|
||||
return score;
|
||||
}
|
||||
|
||||
#ifdef ___BUILD_MODEL___
|
||||
bool NGram::build_unigram(LemmaEntry *lemma_arr, size_t lemma_num,
|
||||
LemmaIdType next_idx_unused) {
|
||||
if (NULL == lemma_arr || 0 == lemma_num || next_idx_unused <= 1)
|
||||
return false;
|
||||
|
||||
double total_freq = 0;
|
||||
double *freqs = new double[next_idx_unused];
|
||||
if (NULL == freqs)
|
||||
return false;
|
||||
|
||||
freqs[0] = ADD_COUNT;
|
||||
total_freq += freqs[0];
|
||||
LemmaIdType idx_now = 0;
|
||||
for (size_t pos = 0; pos < lemma_num; pos++) {
|
||||
if (lemma_arr[pos].idx_by_hz == idx_now)
|
||||
continue;
|
||||
idx_now++;
|
||||
|
||||
assert(lemma_arr[pos].idx_by_hz == idx_now);
|
||||
|
||||
freqs[idx_now] = lemma_arr[pos].freq;
|
||||
if (freqs[idx_now] <= 0)
|
||||
freqs[idx_now] = 0.3;
|
||||
|
||||
total_freq += freqs[idx_now];
|
||||
}
|
||||
|
||||
double max_freq = 0;
|
||||
idx_num_ = idx_now + 1;
|
||||
assert(idx_now + 1 == next_idx_unused);
|
||||
|
||||
for (size_t pos = 0; pos < idx_num_; pos++) {
|
||||
freqs[pos] = freqs[pos] / total_freq;
|
||||
assert(freqs[pos] > 0);
|
||||
if (freqs[pos] > max_freq)
|
||||
max_freq = freqs[pos];
|
||||
}
|
||||
|
||||
// calculate the code book
|
||||
if (NULL == freq_codes_df_)
|
||||
freq_codes_df_ = new double[kCodeBookSize];
|
||||
assert(freq_codes_df_);
|
||||
memset(freq_codes_df_, 0, sizeof(double) * kCodeBookSize);
|
||||
|
||||
if (NULL == freq_codes_)
|
||||
freq_codes_ = new LmaScoreType[kCodeBookSize];
|
||||
assert(freq_codes_);
|
||||
memset(freq_codes_, 0, sizeof(LmaScoreType) * kCodeBookSize);
|
||||
|
||||
size_t freq_pos = 0;
|
||||
for (size_t code_pos = 0; code_pos < kCodeBookSize; code_pos++) {
|
||||
bool found = true;
|
||||
|
||||
while (found) {
|
||||
found = false;
|
||||
double cand = freqs[freq_pos];
|
||||
for (size_t i = 0; i < code_pos; i++)
|
||||
if (freq_codes_df_[i] == cand) {
|
||||
found = true;
|
||||
break;
|
||||
}
|
||||
if (found)
|
||||
freq_pos++;
|
||||
}
|
||||
|
||||
freq_codes_df_[code_pos] = freqs[freq_pos];
|
||||
freq_pos++;
|
||||
}
|
||||
|
||||
myqsort(freq_codes_df_, kCodeBookSize, sizeof(double), comp_double);
|
||||
|
||||
if (NULL == lma_freq_idx_)
|
||||
lma_freq_idx_ = new CODEBOOK_TYPE[idx_num_];
|
||||
assert(lma_freq_idx_);
|
||||
|
||||
iterate_codes(freqs, idx_num_, freq_codes_df_, lma_freq_idx_);
|
||||
|
||||
delete [] freqs;
|
||||
|
||||
if (kPrintDebug0) {
|
||||
printf("\n------Language Model Unigram Codebook------\n");
|
||||
}
|
||||
|
||||
for (size_t code_pos = 0; code_pos < kCodeBookSize; code_pos++) {
|
||||
double log_score = log(freq_codes_df_[code_pos]);
|
||||
float final_score = convert_psb_to_score(freq_codes_df_[code_pos]);
|
||||
if (kPrintDebug0) {
|
||||
printf("code:%d, probability:%.9f, log score:%.3f, final score: %.3f\n",
|
||||
code_pos, freq_codes_df_[code_pos], log_score, final_score);
|
||||
}
|
||||
freq_codes_[code_pos] = static_cast<LmaScoreType>(final_score);
|
||||
}
|
||||
|
||||
initialized_ = true;
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
} // namespace ime_pinyin
|
|
@ -0,0 +1,96 @@
|
|||
/*
|
||||
* Copyright (C) 2009 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef PINYINIME_INCLUDE_NGRAM_H__
|
||||
#define PINYINIME_INCLUDE_NGRAM_H__
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include "./dictdef.h"
|
||||
|
||||
namespace ime_pinyin {
|
||||
|
||||
typedef unsigned char CODEBOOK_TYPE;
|
||||
|
||||
static const size_t kCodeBookSize = 256;
|
||||
|
||||
class NGram {
|
||||
public:
|
||||
// The maximum score of a lemma item.
|
||||
static const LmaScoreType kMaxScore = 0x3fff;
|
||||
|
||||
// In order to reduce the storage size, the original log value is amplified by
|
||||
// kScoreAmplifier, and we use LmaScoreType to store.
|
||||
// After this process, an item with a lower score has a higher frequency.
|
||||
static const int kLogValueAmplifier = -800;
|
||||
|
||||
// System words' total frequency. It is not the real total frequency, instead,
|
||||
// It is only used to adjust system lemmas' scores when the user dictionary's
|
||||
// total frequency changes.
|
||||
// In this version, frequencies of system lemmas are fixed. We are considering
|
||||
// to make them changable in next version.
|
||||
static const size_t kSysDictTotalFreq = 100000000;
|
||||
|
||||
private:
|
||||
|
||||
static NGram* instance_;
|
||||
|
||||
bool initialized_;
|
||||
uint32 idx_num_;
|
||||
|
||||
size_t total_freq_none_sys_;
|
||||
|
||||
// Score compensation for system dictionary lemmas.
|
||||
// Because after user adds some user lemmas, the total frequency changes, and
|
||||
// we use this value to normalize the score.
|
||||
float sys_score_compensation_;
|
||||
|
||||
#ifdef ___BUILD_MODEL___
|
||||
double *freq_codes_df_;
|
||||
#endif
|
||||
LmaScoreType *freq_codes_;
|
||||
CODEBOOK_TYPE *lma_freq_idx_;
|
||||
|
||||
public:
|
||||
NGram();
|
||||
~NGram();
|
||||
|
||||
static NGram& get_instance();
|
||||
|
||||
bool save_ngram(FILE *fp);
|
||||
bool load_ngram(FILE *fp);
|
||||
|
||||
// Set the total frequency of all none system dictionaries.
|
||||
void set_total_freq_none_sys(size_t freq_none_sys);
|
||||
|
||||
float get_uni_psb(LemmaIdType lma_id);
|
||||
|
||||
// Convert a probability to score. Actually, the score will be limited to
|
||||
// kMaxScore, but at runtime, we also need float expression to get accurate
|
||||
// value of the score.
|
||||
// After the conversion, a lower score indicates a higher probability of the
|
||||
// item.
|
||||
static float convert_psb_to_score(double psb);
|
||||
|
||||
#ifdef ___BUILD_MODEL___
|
||||
// For constructing the unigram mode model.
|
||||
bool build_unigram(LemmaEntry *lemma_arr, size_t num,
|
||||
LemmaIdType next_idx_unused);
|
||||
#endif
|
||||
};
|
||||
}
|
||||
|
||||
#endif // PINYINIME_INCLUDE_NGRAM_H__
|
|
@ -0,0 +1,197 @@
|
|||
/*
|
||||
* Copyright (C) 2009 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include "pinyinime.h"
|
||||
#include "dicttrie.h"
|
||||
#include "matrixsearch.h"
|
||||
#include "spellingtrie.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
using namespace ime_pinyin;
|
||||
|
||||
// The maximum number of the prediction items.
|
||||
static const size_t kMaxPredictNum = 500;
|
||||
|
||||
// Used to search Pinyin string and give the best candidate.
|
||||
MatrixSearch* matrix_search = NULL;
|
||||
|
||||
char16 predict_buf[kMaxPredictNum][kMaxPredictSize + 1];
|
||||
|
||||
bool im_open_decoder(const char *fn_sys_dict, const char *fn_usr_dict) {
|
||||
if (NULL != matrix_search)
|
||||
delete matrix_search;
|
||||
|
||||
matrix_search = new MatrixSearch();
|
||||
if (NULL == matrix_search) {
|
||||
return false;
|
||||
}
|
||||
|
||||
return matrix_search->init(fn_sys_dict, fn_usr_dict);
|
||||
}
|
||||
|
||||
bool im_open_decoder_fd(int sys_fd, long start_offset, long length,
|
||||
const char *fn_usr_dict) {
|
||||
if (NULL != matrix_search)
|
||||
delete matrix_search;
|
||||
|
||||
matrix_search = new MatrixSearch();
|
||||
if (NULL == matrix_search)
|
||||
return false;
|
||||
|
||||
return matrix_search->init_fd(sys_fd, start_offset, length, fn_usr_dict);
|
||||
}
|
||||
|
||||
void im_close_decoder() {
|
||||
if (NULL != matrix_search) {
|
||||
matrix_search->close();
|
||||
delete matrix_search;
|
||||
}
|
||||
matrix_search = NULL;
|
||||
}
|
||||
|
||||
void im_set_max_lens(size_t max_sps_len, size_t max_hzs_len) {
|
||||
if (NULL != matrix_search) {
|
||||
matrix_search->set_max_lens(max_sps_len, max_hzs_len);
|
||||
}
|
||||
}
|
||||
|
||||
void im_flush_cache() {
|
||||
if (NULL != matrix_search)
|
||||
matrix_search->flush_cache();
|
||||
}
|
||||
|
||||
// To be updated.
|
||||
size_t im_search(const char* pybuf, size_t pylen) {
|
||||
if (NULL == matrix_search)
|
||||
return 0;
|
||||
|
||||
matrix_search->search(pybuf, pylen);
|
||||
return matrix_search->get_candidate_num();
|
||||
}
|
||||
|
||||
size_t im_delsearch(size_t pos, bool is_pos_in_splid,
|
||||
bool clear_fixed_this_step) {
|
||||
if (NULL == matrix_search)
|
||||
return 0;
|
||||
matrix_search->delsearch(pos, is_pos_in_splid, clear_fixed_this_step);
|
||||
return matrix_search->get_candidate_num();
|
||||
}
|
||||
|
||||
void im_reset_search() {
|
||||
if (NULL == matrix_search)
|
||||
return;
|
||||
|
||||
matrix_search->reset_search();
|
||||
}
|
||||
|
||||
// To be removed
|
||||
size_t im_add_letter(char ch) {
|
||||
return 0;
|
||||
}
|
||||
|
||||
const char* im_get_sps_str(size_t *decoded_len) {
|
||||
if (NULL == matrix_search)
|
||||
return NULL;
|
||||
|
||||
return matrix_search->get_pystr(decoded_len);
|
||||
}
|
||||
|
||||
char16* im_get_candidate(size_t cand_id, char16* cand_str,
|
||||
size_t max_len) {
|
||||
if (NULL == matrix_search)
|
||||
return NULL;
|
||||
|
||||
return matrix_search->get_candidate(cand_id, cand_str, max_len);
|
||||
}
|
||||
|
||||
size_t im_get_spl_start_pos(const uint16 *&spl_start) {
|
||||
if (NULL == matrix_search)
|
||||
return 0;
|
||||
|
||||
return matrix_search->get_spl_start(spl_start);
|
||||
}
|
||||
|
||||
size_t im_choose(size_t choice_id) {
|
||||
if (NULL == matrix_search)
|
||||
return 0;
|
||||
|
||||
return matrix_search->choose(choice_id);
|
||||
}
|
||||
|
||||
size_t im_cancel_last_choice() {
|
||||
if (NULL == matrix_search)
|
||||
return 0;
|
||||
|
||||
return matrix_search->cancel_last_choice();
|
||||
}
|
||||
|
||||
size_t im_get_fixed_len() {
|
||||
if (NULL == matrix_search)
|
||||
return 0;
|
||||
|
||||
return matrix_search->get_fixedlen();
|
||||
}
|
||||
|
||||
// To be removed
|
||||
bool im_cancel_input() {
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
size_t im_get_predicts(const char16 *his_buf,
|
||||
char16 (*&pre_buf)[kMaxPredictSize + 1]) {
|
||||
if (NULL == his_buf)
|
||||
return 0;
|
||||
|
||||
size_t fixed_len = utf16_strlen(his_buf);
|
||||
const char16 *fixed_ptr = his_buf;
|
||||
if (fixed_len > kMaxPredictSize) {
|
||||
fixed_ptr += fixed_len - kMaxPredictSize;
|
||||
fixed_len = kMaxPredictSize;
|
||||
}
|
||||
|
||||
pre_buf = predict_buf;
|
||||
return matrix_search->get_predicts(his_buf, pre_buf, kMaxPredictNum);
|
||||
}
|
||||
|
||||
void im_enable_shm_as_szm(bool enable) {
|
||||
SpellingTrie &spl_trie = SpellingTrie::get_instance();
|
||||
spl_trie.szm_enable_shm(enable);
|
||||
}
|
||||
|
||||
void im_enable_ym_as_szm(bool enable) {
|
||||
SpellingTrie &spl_trie = SpellingTrie::get_instance();
|
||||
spl_trie.szm_enable_ym(enable);
|
||||
}
|
||||
|
||||
void im_init_user_dictionary(const char *fn_usr_dict) {
|
||||
if (!matrix_search)
|
||||
return;
|
||||
matrix_search->flush_cache();
|
||||
matrix_search->init_user_dictionary(fn_usr_dict);
|
||||
}
|
||||
|
||||
bool im_is_user_dictionary_enabled(void) {
|
||||
return NULL != matrix_search ? matrix_search->is_user_dictionary_enabled() : false;
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
|
@ -0,0 +1,223 @@
|
|||
/*
|
||||
* Copyright (C) 2009 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef PINYINIME_INCLUDE_ANDPYIME_H__
|
||||
#define PINYINIME_INCLUDE_ANDPYIME_H__
|
||||
|
||||
#include <stdlib.h>
|
||||
#include "./dictdef.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
namespace ime_pinyin {
|
||||
|
||||
/**
|
||||
* Open the decoder engine via the system and user dictionary file names.
|
||||
*
|
||||
* @param fn_sys_dict The file name of the system dictionary.
|
||||
* @param fn_usr_dict The file name of the user dictionary.
|
||||
* @return true if open the decoder engine successfully.
|
||||
*/
|
||||
bool im_open_decoder(const char *fn_sys_dict, const char *fn_usr_dict);
|
||||
|
||||
/**
|
||||
* Open the decoder engine via the system dictionary FD and user dictionary
|
||||
* file name. Because on Android, the system dictionary is embedded in the
|
||||
* whole application apk file.
|
||||
*
|
||||
* @param sys_fd The file in which the system dictionary is embedded.
|
||||
* @param start_offset The starting position of the system dictionary in the
|
||||
* file sys_fd.
|
||||
* @param length The length of the system dictionary in the file sys_fd,
|
||||
* counted in byte.
|
||||
* @return true if succeed.
|
||||
*/
|
||||
bool im_open_decoder_fd(int sys_fd, long start_offset, long length,
|
||||
const char *fn_usr_dict);
|
||||
|
||||
/**
|
||||
* Close the decoder engine.
|
||||
*/
|
||||
void im_close_decoder();
|
||||
|
||||
/**
|
||||
* Set maximum limitations for decoding. If this function is not called,
|
||||
* default values will be used. For example, due to screen size limitation,
|
||||
* the UI engine of the IME can only show a certain number of letters(input)
|
||||
* to decode, and a certain number of Chinese characters(output). If after
|
||||
* user adds a new letter, the input or the output string is longer than the
|
||||
* limitations, the engine will discard the recent letter.
|
||||
*
|
||||
* @param max_sps_len Maximum length of the spelling string(Pinyin string).
|
||||
* @max_hzs_len Maximum length of the decoded Chinese character string.
|
||||
*/
|
||||
void im_set_max_lens(size_t max_sps_len, size_t max_hzs_len);
|
||||
|
||||
/**
|
||||
* Flush cached data to persistent memory. Because at runtime, in order to
|
||||
* achieve best performance, some data is only store in memory.
|
||||
*/
|
||||
void im_flush_cache();
|
||||
|
||||
/**
|
||||
* Use a spelling string(Pinyin string) to search. The engine will try to do
|
||||
* an incremental search based on its previous search result, so if the new
|
||||
* string has the same prefix with the previous one stored in the decoder,
|
||||
* the decoder will only continue the search from the end of the prefix.
|
||||
* If the caller needs to do a brand new search, please call im_reset_search()
|
||||
* first. Calling im_search() is equivalent to calling im_add_letter() one by
|
||||
* one.
|
||||
*
|
||||
* @param sps_buf The spelling string buffer to decode.
|
||||
* @param sps_len The length of the spelling string buffer.
|
||||
* @return The number of candidates.
|
||||
*/
|
||||
size_t im_search(const char* sps_buf, size_t sps_len);
|
||||
|
||||
/**
|
||||
* Make a delete operation in the current search result, and make research if
|
||||
* necessary.
|
||||
*
|
||||
* @param pos The posistion of char in spelling string to delete, or the
|
||||
* position of spelling id in result string to delete.
|
||||
* @param is_pos_in_splid Indicate whether the pos parameter is the position
|
||||
* in the spelling string, or the position in the result spelling id string.
|
||||
* @return The number of candidates.
|
||||
*/
|
||||
size_t im_delsearch(size_t pos, bool is_pos_in_splid,
|
||||
bool clear_fixed_this_step);
|
||||
|
||||
/**
|
||||
* Reset the previous search result.
|
||||
*/
|
||||
void im_reset_search();
|
||||
|
||||
/**
|
||||
* Add a Pinyin letter to the current spelling string kept by decoder. If the
|
||||
* decoder fails in adding the letter, it will do nothing. im_get_sps_str()
|
||||
* can be used to get the spelling string kept by decoder currently.
|
||||
*
|
||||
* @param ch The letter to add.
|
||||
* @return The number of candidates.
|
||||
*/
|
||||
size_t im_add_letter(char ch);
|
||||
|
||||
/**
|
||||
* Get the spelling string kept by the decoder.
|
||||
*
|
||||
* @param decoded_len Used to return how many characters in the spelling
|
||||
* string is successfully parsed.
|
||||
* @return The spelling string kept by the decoder.
|
||||
*/
|
||||
const char *im_get_sps_str(size_t *decoded_len);
|
||||
|
||||
/**
|
||||
* Get a candidate(or choice) string.
|
||||
*
|
||||
* @param cand_id The id to get a candidate. Started from 0. Usually, id 0
|
||||
* is a sentence-level candidate.
|
||||
* @param cand_str The buffer to store the candidate.
|
||||
* @param max_len The maximum length of the buffer.
|
||||
* @return cand_str if succeeds, otherwise NULL.
|
||||
*/
|
||||
char16* im_get_candidate(size_t cand_id, char16* cand_str,
|
||||
size_t max_len);
|
||||
|
||||
/**
|
||||
* Get the segmentation information(the starting positions) of the spelling
|
||||
* string.
|
||||
*
|
||||
* @param spl_start Used to return the starting posistions.
|
||||
* @return The number of spelling ids. If it is L, there will be L+1 valid
|
||||
* elements in spl_start, and spl_start[L] is the posistion after the end of
|
||||
* the last spelling id.
|
||||
*/
|
||||
size_t im_get_spl_start_pos(const uint16 *&spl_start);
|
||||
|
||||
/**
|
||||
* Choose a candidate and make it fixed. If the candidate does not match
|
||||
* the end of all spelling ids, new candidates will be provided from the
|
||||
* first unfixed position. If the candidate matches the end of the all
|
||||
* spelling ids, there will be only one new candidates, or the whole fixed
|
||||
* sentence.
|
||||
*
|
||||
* @param cand_id The id of candidate to select and make it fixed.
|
||||
* @return The number of candidates. If after the selection, the whole result
|
||||
* string has been fixed, there will be only one candidate.
|
||||
*/
|
||||
size_t im_choose(size_t cand_id);
|
||||
|
||||
/**
|
||||
* Cancel the last selection, or revert the last operation of im_choose().
|
||||
*
|
||||
* @return The number of candidates.
|
||||
*/
|
||||
size_t im_cancel_last_choice();
|
||||
|
||||
/**
|
||||
* Get the number of fixed spelling ids, or Chinese characters.
|
||||
*
|
||||
* @return The number of fixed spelling ids, of Chinese characters.
|
||||
*/
|
||||
size_t im_get_fixed_len();
|
||||
|
||||
/**
|
||||
* Cancel the input state and reset the search workspace.
|
||||
*/
|
||||
bool im_cancel_input();
|
||||
|
||||
/**
|
||||
* Get prediction candiates based on the given fixed Chinese string as the
|
||||
* history.
|
||||
*
|
||||
* @param his_buf The history buffer to do the prediction. It should be ended
|
||||
* with '\0'.
|
||||
* @param pre_buf Used to return prediction result list.
|
||||
* @return The number of predicted result string.
|
||||
*/
|
||||
size_t im_get_predicts(const char16 *his_buf,
|
||||
char16 (*&pre_buf)[kMaxPredictSize + 1]);
|
||||
|
||||
/**
|
||||
* Enable Shengmus in ShouZiMu mode.
|
||||
*/
|
||||
void im_enable_shm_as_szm(bool enable);
|
||||
|
||||
/**
|
||||
* Enable Yunmus in ShouZiMu mode.
|
||||
*/
|
||||
void im_enable_ym_as_szm(bool enable);
|
||||
|
||||
/**
|
||||
* Initializes or uninitializes the user dictionary.
|
||||
*
|
||||
* @param fn_usr_dict The file name of the user dictionary.
|
||||
*/
|
||||
void im_init_user_dictionary(const char *fn_usr_dict);
|
||||
|
||||
/**
|
||||
* Returns the current status of user dictinary.
|
||||
*/
|
||||
bool im_is_user_dictionary_enabled(void);
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // PINYINIME_INCLUDE_ANDPYIME_H__
|
|
@ -0,0 +1,210 @@
|
|||
/*
|
||||
* Copyright (C) 2009 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
#include "mystdlib.h"
|
||||
#include "searchutility.h"
|
||||
|
||||
namespace ime_pinyin {
|
||||
|
||||
bool is_system_lemma(LemmaIdType lma_id) {
|
||||
return (0 < lma_id && lma_id <= kSysDictIdEnd);
|
||||
}
|
||||
|
||||
bool is_user_lemma(LemmaIdType lma_id) {
|
||||
return (kUserDictIdStart <= lma_id && lma_id <= kUserDictIdEnd);
|
||||
}
|
||||
|
||||
bool is_composing_lemma(LemmaIdType lma_id) {
|
||||
return (kLemmaIdComposing == lma_id);
|
||||
}
|
||||
|
||||
int cmp_lpi_with_psb(const void *p1, const void *p2) {
|
||||
if ((static_cast<const LmaPsbItem*>(p1))->psb >
|
||||
(static_cast<const LmaPsbItem*>(p2))->psb)
|
||||
return 1;
|
||||
if ((static_cast<const LmaPsbItem*>(p1))->psb <
|
||||
(static_cast<const LmaPsbItem*>(p2))->psb)
|
||||
return -1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int cmp_lpi_with_unified_psb(const void *p1, const void *p2) {
|
||||
const LmaPsbItem *item1 = static_cast<const LmaPsbItem*>(p1);
|
||||
const LmaPsbItem *item2 = static_cast<const LmaPsbItem*>(p2);
|
||||
|
||||
// The real unified psb is psb1 / lma_len1 and psb2 * lma_len2
|
||||
// But we use psb1 * lma_len2 and psb2 * lma_len1 to get better
|
||||
// precision.
|
||||
size_t up1 = item1->psb * (item2->lma_len);
|
||||
size_t up2 = item2->psb * (item1->lma_len);
|
||||
if (up1 < up2) {
|
||||
return -1;
|
||||
}
|
||||
if (up1 > up2) {
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
int cmp_lpi_with_id(const void *p1, const void *p2) {
|
||||
if ((static_cast<const LmaPsbItem*>(p1))->id <
|
||||
(static_cast<const LmaPsbItem*>(p2))->id)
|
||||
return -1;
|
||||
if ((static_cast<const LmaPsbItem*>(p1))->id >
|
||||
(static_cast<const LmaPsbItem*>(p2))->id)
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int cmp_lpi_with_hanzi(const void *p1, const void *p2) {
|
||||
if ((static_cast<const LmaPsbItem*>(p1))->hanzi <
|
||||
(static_cast<const LmaPsbItem*>(p2))->hanzi)
|
||||
return -1;
|
||||
if ((static_cast<const LmaPsbItem*>(p1))->hanzi >
|
||||
(static_cast<const LmaPsbItem*>(p2))->hanzi)
|
||||
return 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int cmp_lpsi_with_str(const void *p1, const void *p2) {
|
||||
return utf16_strcmp((static_cast<const LmaPsbStrItem*>(p1))->str,
|
||||
(static_cast<const LmaPsbStrItem*>(p2))->str);
|
||||
}
|
||||
|
||||
|
||||
int cmp_hanzis_1(const void *p1, const void *p2) {
|
||||
if (*static_cast<const char16*>(p1) <
|
||||
*static_cast<const char16*>(p2))
|
||||
return -1;
|
||||
|
||||
if (*static_cast<const char16*>(p1) >
|
||||
*static_cast<const char16*>(p2))
|
||||
return 1;
|
||||
return 0;
|
||||
}
|
||||
|
||||
int cmp_hanzis_2(const void *p1, const void *p2) {
|
||||
return utf16_strncmp(static_cast<const char16*>(p1),
|
||||
static_cast<const char16*>(p2), 2);
|
||||
}
|
||||
|
||||
int cmp_hanzis_3(const void *p1, const void *p2) {
|
||||
return utf16_strncmp(static_cast<const char16*>(p1),
|
||||
static_cast<const char16*>(p2), 3);
|
||||
}
|
||||
|
||||
int cmp_hanzis_4(const void *p1, const void *p2) {
|
||||
return utf16_strncmp(static_cast<const char16*>(p1),
|
||||
static_cast<const char16*>(p2), 4);
|
||||
}
|
||||
|
||||
int cmp_hanzis_5(const void *p1, const void *p2) {
|
||||
return utf16_strncmp(static_cast<const char16*>(p1),
|
||||
static_cast<const char16*>(p2), 5);
|
||||
}
|
||||
|
||||
int cmp_hanzis_6(const void *p1, const void *p2) {
|
||||
return utf16_strncmp(static_cast<const char16*>(p1),
|
||||
static_cast<const char16*>(p2), 6);
|
||||
}
|
||||
|
||||
int cmp_hanzis_7(const void *p1, const void *p2) {
|
||||
return utf16_strncmp(static_cast<const char16*>(p1),
|
||||
static_cast<const char16*>(p2), 7);
|
||||
}
|
||||
|
||||
int cmp_hanzis_8(const void *p1, const void *p2) {
|
||||
return utf16_strncmp(static_cast<const char16*>(p1),
|
||||
static_cast<const char16*>(p2), 8);
|
||||
}
|
||||
|
||||
int cmp_npre_by_score(const void *p1, const void *p2) {
|
||||
if ((static_cast<const NPredictItem*>(p1))->psb >
|
||||
(static_cast<const NPredictItem*>(p2))->psb)
|
||||
return 1;
|
||||
|
||||
if ((static_cast<const NPredictItem*>(p1))->psb <
|
||||
(static_cast<const NPredictItem*>(p2))->psb)
|
||||
return -1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int cmp_npre_by_hislen_score(const void *p1, const void *p2) {
|
||||
if ((static_cast<const NPredictItem*>(p1))->his_len <
|
||||
(static_cast<const NPredictItem*>(p2))->his_len)
|
||||
return 1;
|
||||
|
||||
if ((static_cast<const NPredictItem*>(p1))->his_len >
|
||||
(static_cast<const NPredictItem*>(p2))->his_len)
|
||||
return -1;
|
||||
|
||||
if ((static_cast<const NPredictItem*>(p1))->psb >
|
||||
(static_cast<const NPredictItem*>(p2))->psb)
|
||||
return 1;
|
||||
|
||||
if ((static_cast<const NPredictItem*>(p1))->psb <
|
||||
(static_cast<const NPredictItem*>(p2))->psb)
|
||||
return -1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
int cmp_npre_by_hanzi_score(const void *p1, const void *p2) {
|
||||
int ret_v = (utf16_strncmp((static_cast<const NPredictItem*>(p1))->pre_hzs,
|
||||
(static_cast<const NPredictItem*>(p2))->pre_hzs, kMaxPredictSize));
|
||||
if (0 != ret_v)
|
||||
return ret_v;
|
||||
|
||||
if ((static_cast<const NPredictItem*>(p1))->psb >
|
||||
(static_cast<const NPredictItem*>(p2))->psb)
|
||||
return 1;
|
||||
|
||||
if ((static_cast<const NPredictItem*>(p1))->psb <
|
||||
(static_cast<const NPredictItem*>(p2))->psb)
|
||||
return -1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
size_t remove_duplicate_npre(NPredictItem *npre_items, size_t npre_num) {
|
||||
if (NULL == npre_items || 0 == npre_num)
|
||||
return 0;
|
||||
|
||||
myqsort(npre_items, npre_num, sizeof(NPredictItem), cmp_npre_by_hanzi_score);
|
||||
|
||||
size_t remain_num = 1; // The first one is reserved.
|
||||
for (size_t pos = 1; pos < npre_num; pos++) {
|
||||
if (utf16_strncmp(npre_items[pos].pre_hzs,
|
||||
npre_items[remain_num - 1].pre_hzs,
|
||||
kMaxPredictSize) != 0) {
|
||||
if (remain_num != pos) {
|
||||
npre_items[remain_num] = npre_items[pos];
|
||||
}
|
||||
remain_num++;
|
||||
}
|
||||
}
|
||||
return remain_num;
|
||||
}
|
||||
|
||||
size_t align_to_size_t(size_t size) {
|
||||
size_t s = sizeof(size_t);
|
||||
return (size + s -1) / s * s;
|
||||
}
|
||||
|
||||
} // namespace ime_pinyin
|
|
@ -0,0 +1,142 @@
|
|||
/*
|
||||
* Copyright (C) 2009 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef PINYINIME_ANDPY_INCLUDE_SEARCHCOMMON_H__
|
||||
#define PINYINIME_ANDPY_INCLUDE_SEARCHCOMMON_H__
|
||||
|
||||
#include <stdlib.h>
|
||||
#include "./spellingtrie.h"
|
||||
|
||||
namespace ime_pinyin {
|
||||
|
||||
// Type used to identify the size of a pool, such as id pool, etc.
|
||||
typedef uint16 PoolPosType;
|
||||
|
||||
// Type used to identify a parsing mile stone in an atom dictionary.
|
||||
typedef uint16 MileStoneHandle;
|
||||
|
||||
// Type used to express a lemma and its probability score.
|
||||
typedef struct {
|
||||
size_t id:(kLemmaIdSize * 8);
|
||||
size_t lma_len:4;
|
||||
uint16 psb; // The score, the lower psb, the higher possibility.
|
||||
// For single character items, we may also need Hanzi.
|
||||
// For multiple characer items, ignore it.
|
||||
char16 hanzi;
|
||||
} LmaPsbItem, *PLmaPsbItem;
|
||||
|
||||
// LmaPsbItem extended with string.
|
||||
typedef struct {
|
||||
LmaPsbItem lpi;
|
||||
char16 str[kMaxLemmaSize + 1];
|
||||
} LmaPsbStrItem, *PLmaPsbStrItem;
|
||||
|
||||
|
||||
typedef struct {
|
||||
float psb;
|
||||
char16 pre_hzs[kMaxPredictSize];
|
||||
uint16 his_len; // The length of the history used to do the prediction.
|
||||
} NPredictItem, *PNPredictItem;
|
||||
|
||||
// Parameter structure used to extend in a dictionary. All dictionaries
|
||||
// receives the same DictExtPara and a dictionary specific MileStoneHandle for
|
||||
// extending.
|
||||
//
|
||||
// When the user inputs a new character, AtomDictBase::extend_dict() will be
|
||||
// called at least once for each dictionary.
|
||||
//
|
||||
// For example, when the user inputs "wm", extend_dict() will be called twice,
|
||||
// and the DictExtPara parameter are as follows respectively:
|
||||
// 1. splids = {w, m}; splids_extended = 1; ext_len = 1; step_no = 1;
|
||||
// splid_end_split = false; id_start = wa(the first id start with 'w');
|
||||
// id_num = number of ids starting with 'w'.
|
||||
// 2. splids = {m}; splids_extended = 0; ext_len = 1; step_no = 1;
|
||||
// splid_end_split = false; id_start = wa; id_num = number of ids starting with
|
||||
// 'w'.
|
||||
//
|
||||
// For string "women", one of the cases of the DictExtPara parameter is:
|
||||
// splids = {wo, men}, splids_extended = 1, ext_len = 3 (length of "men"),
|
||||
// step_no = 4; splid_end_split = false; id_start = men, id_num = 1.
|
||||
//
|
||||
typedef struct {
|
||||
// Spelling ids for extending, there are splids_extended + 1 ids in the
|
||||
// buffer.
|
||||
// For a normal lemma, there can only be kMaxLemmaSize spelling ids in max,
|
||||
// but for a composing phrase, there can kMaxSearchSteps spelling ids.
|
||||
uint16 splids[kMaxSearchSteps];
|
||||
|
||||
// Number of ids that have been used before. splids[splids_extended] is the
|
||||
// newly added id for the current extension.
|
||||
uint16 splids_extended;
|
||||
|
||||
// The step span of the extension. It is also the size of the string for
|
||||
// the newly added spelling id.
|
||||
uint16 ext_len;
|
||||
|
||||
// The step number for the current extension. It is also the ending position
|
||||
// in the input Pinyin string for the substring of spelling ids in splids[].
|
||||
// For example, when the user inputs "women", step_no = 4.
|
||||
// This parameter may useful to manage the MileStoneHandle list for each
|
||||
// step. When the user deletes a character from the string, MileStoneHandle
|
||||
// objects for the the steps after that character should be reset; when the
|
||||
// user begins a new string, all MileStoneHandle objects should be reset.
|
||||
uint16 step_no;
|
||||
|
||||
// Indicate whether the newly added spelling ends with a splitting character
|
||||
bool splid_end_split;
|
||||
|
||||
// If the newly added id is a half id, id_start is the first id of the
|
||||
// corresponding full ids; if the newly added id is a full id, id_start is
|
||||
// that id.
|
||||
uint16 id_start;
|
||||
|
||||
// If the newly added id is a half id, id_num is the number of corresponding
|
||||
// ids; if it is a full id, id_num == 1.
|
||||
uint16 id_num;
|
||||
}DictExtPara, *PDictExtPara;
|
||||
|
||||
bool is_system_lemma(LemmaIdType lma_id);
|
||||
bool is_user_lemma(LemmaIdType lma_id);
|
||||
bool is_composing_lemma(LemmaIdType lma_id);
|
||||
|
||||
int cmp_lpi_with_psb(const void *p1, const void *p2);
|
||||
int cmp_lpi_with_unified_psb(const void *p1, const void *p2);
|
||||
int cmp_lpi_with_id(const void *p1, const void *p2);
|
||||
int cmp_lpi_with_hanzi(const void *p1, const void *p2);
|
||||
|
||||
int cmp_lpsi_with_str(const void *p1, const void *p2);
|
||||
|
||||
int cmp_hanzis_1(const void *p1, const void *p2);
|
||||
int cmp_hanzis_2(const void *p1, const void *p2);
|
||||
int cmp_hanzis_3(const void *p1, const void *p2);
|
||||
int cmp_hanzis_4(const void *p1, const void *p2);
|
||||
int cmp_hanzis_5(const void *p1, const void *p2);
|
||||
int cmp_hanzis_6(const void *p1, const void *p2);
|
||||
int cmp_hanzis_7(const void *p1, const void *p2);
|
||||
int cmp_hanzis_8(const void *p1, const void *p2);
|
||||
|
||||
int cmp_npre_by_score(const void *p1, const void *p2);
|
||||
int cmp_npre_by_hislen_score(const void *p1, const void *p2);
|
||||
int cmp_npre_by_hanzi_score(const void *p1, const void *p2);
|
||||
|
||||
|
||||
size_t remove_duplicate_npre(NPredictItem *npre_items, size_t npre_num);
|
||||
|
||||
size_t align_to_size_t(size_t size);
|
||||
|
||||
} // namespace
|
||||
|
||||
#endif // PINYINIME_ANDPY_INCLUDE_SEARCHCOMMON_H__
|
|
@ -0,0 +1,313 @@
|
|||
/*
|
||||
* Copyright (C) 2009 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdlib.h>
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <math.h>
|
||||
#include "spellingtable.h"
|
||||
|
||||
namespace ime_pinyin {
|
||||
|
||||
#ifdef ___BUILD_MODEL___
|
||||
|
||||
const char SpellingTable::
|
||||
kNotSupportList[kNotSupportNum][kMaxSpellingSize + 1] = {"HM", "HNG", "NG"};
|
||||
|
||||
// "" is the biggest, so that all empty strings will be moved to the end
|
||||
// _eb mean empty is biggest
|
||||
int compare_raw_spl_eb(const void* p1, const void* p2) {
|
||||
if ('\0' == (static_cast<const RawSpelling*>(p1))->str[0])
|
||||
return 1;
|
||||
|
||||
if ('\0' == (static_cast<const RawSpelling*>(p2))->str[0])
|
||||
return -1;
|
||||
|
||||
return strcmp((static_cast<const RawSpelling*>(p1))->str,
|
||||
(static_cast<const RawSpelling*>(p2))->str);
|
||||
}
|
||||
|
||||
size_t get_odd_next(size_t value) {
|
||||
size_t v_next = value;
|
||||
while (true) {
|
||||
size_t v_next_sqrt = (size_t)sqrt(v_next);
|
||||
|
||||
bool is_odd = true;
|
||||
for (size_t v_dv = 2; v_dv < v_next_sqrt + 1; v_dv++) {
|
||||
if (v_next % v_dv == 0) {
|
||||
is_odd = false;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
if (is_odd)
|
||||
return v_next;
|
||||
|
||||
v_next++;
|
||||
}
|
||||
|
||||
// never reach here
|
||||
return 0;
|
||||
}
|
||||
|
||||
SpellingTable::SpellingTable() {
|
||||
need_score_ = false;
|
||||
raw_spellings_ = NULL;
|
||||
spelling_buf_ = NULL;
|
||||
spelling_num_ = 0;
|
||||
total_freq_ = 0;
|
||||
frozen_ = true;
|
||||
}
|
||||
|
||||
SpellingTable::~SpellingTable() {
|
||||
free_resource();
|
||||
}
|
||||
|
||||
size_t SpellingTable::get_hash_pos(const char* spelling_str) {
|
||||
size_t hash_pos = 0;
|
||||
for (size_t pos = 0; pos < spelling_size_; pos++) {
|
||||
if ('\0' == spelling_str[pos])
|
||||
break;
|
||||
hash_pos += (size_t)spelling_str[pos];
|
||||
}
|
||||
|
||||
hash_pos = hash_pos % spelling_max_num_;
|
||||
return hash_pos;
|
||||
}
|
||||
|
||||
size_t SpellingTable::hash_pos_next(size_t hash_pos) {
|
||||
hash_pos += 123;
|
||||
hash_pos = hash_pos % spelling_max_num_;
|
||||
return hash_pos;
|
||||
}
|
||||
|
||||
void SpellingTable::free_resource() {
|
||||
if (NULL != raw_spellings_)
|
||||
delete [] raw_spellings_;
|
||||
raw_spellings_ = NULL;
|
||||
|
||||
if (NULL != spelling_buf_)
|
||||
delete [] spelling_buf_;
|
||||
spelling_buf_ = NULL;
|
||||
}
|
||||
|
||||
bool SpellingTable::init_table(size_t pure_spl_size, size_t spl_max_num,
|
||||
bool need_score) {
|
||||
if (pure_spl_size == 0 || spl_max_num ==0)
|
||||
return false;
|
||||
|
||||
need_score_ = need_score;
|
||||
|
||||
free_resource();
|
||||
|
||||
spelling_size_ = pure_spl_size + 1;
|
||||
if (need_score)
|
||||
spelling_size_ += 1;
|
||||
spelling_max_num_ = get_odd_next(spl_max_num);
|
||||
spelling_num_ = 0;
|
||||
|
||||
raw_spellings_ = new RawSpelling[spelling_max_num_];
|
||||
spelling_buf_ = new char[spelling_max_num_ * (spelling_size_)];
|
||||
if (NULL == raw_spellings_ || NULL == spelling_buf_) {
|
||||
free_resource();
|
||||
return false;
|
||||
}
|
||||
|
||||
memset(raw_spellings_, 0, spelling_max_num_ * sizeof(RawSpelling));
|
||||
memset(spelling_buf_, 0, spelling_max_num_ * (spelling_size_));
|
||||
frozen_ = false;
|
||||
total_freq_ = 0;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool SpellingTable::put_spelling(const char* spelling_str, double freq) {
|
||||
if (frozen_ || NULL == spelling_str)
|
||||
return false;
|
||||
|
||||
for (size_t pos = 0; pos < kNotSupportNum; pos++) {
|
||||
if (strcmp(spelling_str, kNotSupportList[pos]) == 0) {
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
total_freq_ += freq;
|
||||
|
||||
size_t hash_pos = get_hash_pos(spelling_str);
|
||||
|
||||
raw_spellings_[hash_pos].str[spelling_size_ - 1] = '\0';
|
||||
|
||||
if (strncmp(raw_spellings_[hash_pos].str, spelling_str,
|
||||
spelling_size_ - 1) == 0) {
|
||||
raw_spellings_[hash_pos].freq += freq;
|
||||
return true;
|
||||
}
|
||||
|
||||
size_t hash_pos_ori = hash_pos;
|
||||
|
||||
while (true) {
|
||||
if (strncmp(raw_spellings_[hash_pos].str,
|
||||
spelling_str, spelling_size_ - 1) == 0) {
|
||||
raw_spellings_[hash_pos].freq += freq;
|
||||
return true;
|
||||
}
|
||||
|
||||
if ('\0' == raw_spellings_[hash_pos].str[0]) {
|
||||
raw_spellings_[hash_pos].freq += freq;
|
||||
strncpy(raw_spellings_[hash_pos].str, spelling_str, spelling_size_ - 1);
|
||||
raw_spellings_[hash_pos].str[spelling_size_ - 1] = '\0';
|
||||
spelling_num_++;
|
||||
return true;
|
||||
}
|
||||
|
||||
hash_pos = hash_pos_next(hash_pos);
|
||||
if (hash_pos_ori == hash_pos)
|
||||
return false;
|
||||
}
|
||||
|
||||
// never reach here
|
||||
return false;
|
||||
}
|
||||
|
||||
bool SpellingTable::contain(const char* spelling_str) {
|
||||
if (NULL == spelling_str || NULL == spelling_buf_ || frozen_)
|
||||
return false;
|
||||
|
||||
size_t hash_pos = get_hash_pos(spelling_str);
|
||||
|
||||
if ('\0' == raw_spellings_[hash_pos].str[0])
|
||||
return false;
|
||||
|
||||
if (strncmp(raw_spellings_[hash_pos].str, spelling_str, spelling_size_ - 1)
|
||||
== 0)
|
||||
return true;
|
||||
|
||||
size_t hash_pos_ori = hash_pos;
|
||||
|
||||
while (true) {
|
||||
hash_pos = hash_pos_next(hash_pos);
|
||||
if (hash_pos_ori == hash_pos)
|
||||
return false;
|
||||
|
||||
if ('\0' == raw_spellings_[hash_pos].str[0])
|
||||
return false;
|
||||
|
||||
if (strncmp(raw_spellings_[hash_pos].str, spelling_str, spelling_size_ - 1)
|
||||
== 0)
|
||||
return true;
|
||||
}
|
||||
|
||||
// never reach here
|
||||
return false;
|
||||
}
|
||||
|
||||
const char* SpellingTable::arrange(size_t *item_size, size_t *spl_num) {
|
||||
if (NULL == raw_spellings_ || NULL == spelling_buf_ ||
|
||||
NULL == item_size || NULL == spl_num)
|
||||
return NULL;
|
||||
|
||||
qsort(raw_spellings_, spelling_max_num_, sizeof(RawSpelling),
|
||||
compare_raw_spl_eb);
|
||||
|
||||
// After sorting, only the first spelling_num_ items are valid.
|
||||
// Copy them to the destination buffer.
|
||||
for (size_t pos = 0; pos < spelling_num_; pos++) {
|
||||
strncpy(spelling_buf_ + pos * spelling_size_, raw_spellings_[pos].str,
|
||||
spelling_size_);
|
||||
}
|
||||
|
||||
if (need_score_) {
|
||||
if (kPrintDebug0)
|
||||
printf("------------Spelling Possiblities--------------\n");
|
||||
|
||||
double max_score = 0;
|
||||
double min_score = 0;
|
||||
|
||||
// After sorting, only the first spelling_num_ items are valid.
|
||||
for (size_t pos = 0; pos < spelling_num_; pos++) {
|
||||
raw_spellings_[pos].freq /= total_freq_;
|
||||
if (need_score_) {
|
||||
if (0 == pos) {
|
||||
max_score = raw_spellings_[0].freq;
|
||||
min_score = max_score;
|
||||
} else {
|
||||
if (raw_spellings_[pos].freq > max_score)
|
||||
max_score = raw_spellings_[pos].freq;
|
||||
if (raw_spellings_[pos].freq < min_score)
|
||||
min_score = raw_spellings_[pos].freq;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (kPrintDebug0)
|
||||
printf("-----max psb: %f, min psb: %f\n", max_score, min_score);
|
||||
|
||||
max_score = log(max_score);
|
||||
min_score = log(min_score);
|
||||
|
||||
if (kPrintDebug0)
|
||||
printf("-----max log value: %f, min log value: %f\n",
|
||||
max_score, min_score);
|
||||
|
||||
// The absolute value of min_score is bigger than that of max_score because
|
||||
// both of them are negative after log function.
|
||||
score_amplifier_ = 1.0 * 255 / min_score;
|
||||
|
||||
double average_score = 0;
|
||||
for (size_t pos = 0; pos < spelling_num_; pos++) {
|
||||
double score = log(raw_spellings_[pos].freq) * score_amplifier_;
|
||||
assert(score >= 0);
|
||||
|
||||
average_score += score;
|
||||
|
||||
// Because of calculation precision issue, score might be a little bigger
|
||||
// than 255 after being amplified.
|
||||
if (score > 255)
|
||||
score = 255;
|
||||
char *this_spl_buf = spelling_buf_ + pos * spelling_size_;
|
||||
this_spl_buf[spelling_size_ - 1] =
|
||||
static_cast<char>((unsigned char)score);
|
||||
|
||||
if (kPrintDebug0) {
|
||||
printf("---pos:%d, %s, psb:%d\n", pos, this_spl_buf,
|
||||
(unsigned char)this_spl_buf[spelling_size_ -1]);
|
||||
}
|
||||
}
|
||||
average_score /= spelling_num_;
|
||||
assert(average_score <= 255);
|
||||
average_score_ = static_cast<uint8>(average_score);
|
||||
|
||||
if (kPrintDebug0)
|
||||
printf("\n----Score Amplifier: %f, Average Score: %d\n", score_amplifier_,
|
||||
average_score_);
|
||||
}
|
||||
|
||||
*item_size = spelling_size_;
|
||||
*spl_num = spelling_num_;
|
||||
frozen_ = true;
|
||||
return spelling_buf_;
|
||||
}
|
||||
|
||||
float SpellingTable::get_score_amplifier() {
|
||||
return static_cast<float>(score_amplifier_);
|
||||
}
|
||||
|
||||
unsigned char SpellingTable::get_average_score() {
|
||||
return average_score_;
|
||||
}
|
||||
|
||||
#endif // ___BUILD_MODEL___
|
||||
} // namespace ime_pinyin
|
|
@ -0,0 +1,111 @@
|
|||
/*
|
||||
* Copyright (C) 2009 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef PINYINIME_INCLUDE_SPELLINGTABLE_H__
|
||||
#define PINYINIME_INCLUDE_SPELLINGTABLE_H__
|
||||
|
||||
#include <stdlib.h>
|
||||
#include "./dictdef.h"
|
||||
|
||||
namespace ime_pinyin {
|
||||
|
||||
#ifdef ___BUILD_MODEL___
|
||||
|
||||
const size_t kMaxSpellingSize = kMaxPinyinSize;
|
||||
|
||||
typedef struct {
|
||||
char str[kMaxSpellingSize + 1];
|
||||
double freq;
|
||||
} RawSpelling, *PRawSpelling;
|
||||
|
||||
// This class is used to store the spelling strings
|
||||
// The length of the input spelling string should be less or equal to the
|
||||
// spelling_size_ (set by init_table). If the input string is too long,
|
||||
// we only keep its first spelling_size_ chars.
|
||||
class SpellingTable {
|
||||
private:
|
||||
static const size_t kNotSupportNum = 3;
|
||||
static const char kNotSupportList[kNotSupportNum][kMaxSpellingSize + 1];
|
||||
|
||||
bool need_score_;
|
||||
|
||||
size_t spelling_max_num_;
|
||||
|
||||
RawSpelling *raw_spellings_;
|
||||
|
||||
// Used to store spelling strings. If the spelling table needs to calculate
|
||||
// score, an extra char after each spelling string is the score.
|
||||
// An item with a lower score has a higher probability.
|
||||
char *spelling_buf_;
|
||||
size_t spelling_size_;
|
||||
|
||||
double total_freq_;
|
||||
|
||||
size_t spelling_num_;
|
||||
|
||||
double score_amplifier_;
|
||||
|
||||
unsigned char average_score_;
|
||||
|
||||
// If frozen is true, put_spelling() and contain() are not allowed to call.
|
||||
bool frozen_;
|
||||
|
||||
size_t get_hash_pos(const char* spelling_str);
|
||||
size_t hash_pos_next(size_t hash_pos);
|
||||
void free_resource();
|
||||
public:
|
||||
SpellingTable();
|
||||
~SpellingTable();
|
||||
|
||||
// pure_spl_size is the pure maximum spelling string size. For example,
|
||||
// "zhuang" is the longgest item in Pinyin, so pure_spl_size should be 6.
|
||||
// spl_max_num is the maximum number of spelling strings to store.
|
||||
// need_score is used to indicate whether the caller needs to calculate a
|
||||
// score for each spelling.
|
||||
bool init_table(size_t pure_spl_size, size_t spl_max_num, bool need_score);
|
||||
|
||||
// Put a spelling string to the table.
|
||||
// It always returns false if called after arrange() withtout a new
|
||||
// init_table() operation.
|
||||
// freq is the spelling's occuring count.
|
||||
// If the spelling has been in the table, occuring count will accumulated.
|
||||
bool put_spelling(const char* spelling_str, double spl_count);
|
||||
|
||||
// Test whether a spelling string is in the table.
|
||||
// It always returns false, when being called after arrange() withtout a new
|
||||
// init_table() operation.
|
||||
bool contain(const char* spelling_str);
|
||||
|
||||
// Sort the spelling strings and put them from the begin of the buffer.
|
||||
// Return the pointer of the sorted spelling strings.
|
||||
// item_size and spl_num return the item size and number of spelling.
|
||||
// Because each spelling uses a '\0' as terminator, the returned item_size is
|
||||
// at least one char longer than the spl_size parameter specified by
|
||||
// init_table(). If the table is initialized to calculate score, item_size
|
||||
// will be increased by 1, and current_spl_str[item_size - 1] stores an
|
||||
// unsinged char score.
|
||||
// An item with a lower score has a higher probability.
|
||||
// Do not call put_spelling() and contains() after arrange().
|
||||
const char* arrange(size_t *item_size, size_t *spl_num);
|
||||
|
||||
float get_score_amplifier();
|
||||
|
||||
unsigned char get_average_score();
|
||||
};
|
||||
#endif // ___BUILD_MODEL___
|
||||
}
|
||||
|
||||
#endif // PINYINIME_INCLUDE_SPELLINGTABLE_H__
|
|
@ -0,0 +1,832 @@
|
|||
/*
|
||||
* Copyright (C) 2009 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
#include "dictdef.h"
|
||||
|
||||
#ifdef _WIN32
|
||||
#define snprintf _snprintf
|
||||
#endif
|
||||
|
||||
#ifdef ___BUILD_MODEL___
|
||||
#include "spellingtable.h"
|
||||
#endif
|
||||
|
||||
#include "spellingtrie.h"
|
||||
|
||||
namespace ime_pinyin {
|
||||
|
||||
SpellingTrie* SpellingTrie::instance_ = NULL;
|
||||
|
||||
// z/c/s is for Zh/Ch/Sh
|
||||
const char SpellingTrie::kHalfId2Sc_[kFullSplIdStart + 1] =
|
||||
"0ABCcDEFGHIJKLMNOPQRSsTUVWXYZz";
|
||||
|
||||
// Bit 0 : is it a Shengmu char?
|
||||
// Bit 1 : is it a Yunmu char? (one char is a Yunmu)
|
||||
// Bit 2 : is it enabled in ShouZiMu(first char) mode?
|
||||
unsigned char SpellingTrie::char_flags_[] = {
|
||||
// a b c d e f g
|
||||
0x02, 0x01, 0x01, 0x01, 0x02, 0x01, 0x01,
|
||||
// h i j k l m n
|
||||
0x01, 0x00, 0x01, 0x01, 0x01, 0x01, 0x01,
|
||||
// o p q r s t
|
||||
0x02, 0x01, 0x01, 0x01, 0x01, 0x01,
|
||||
// u v w x y z
|
||||
0x00, 0x00, 0x01, 0x01, 0x01, 0x01
|
||||
};
|
||||
|
||||
int compare_spl(const void* p1, const void* p2) {
|
||||
return strcmp((const char*)(p1), (const char*)(p2));
|
||||
}
|
||||
|
||||
SpellingTrie::SpellingTrie() {
|
||||
spelling_buf_ = NULL;
|
||||
spelling_size_ = 0;
|
||||
spelling_num_ = 0;
|
||||
spl_ym_ids_ = NULL;
|
||||
splstr_queried_ = NULL;
|
||||
splstr16_queried_ = NULL;
|
||||
root_ = NULL;
|
||||
dumb_node_ = NULL;
|
||||
splitter_node_ = NULL;
|
||||
instance_ = NULL;
|
||||
ym_buf_ = NULL;
|
||||
f2h_ = NULL;
|
||||
|
||||
szm_enable_shm(true);
|
||||
szm_enable_ym(true);
|
||||
|
||||
#ifdef ___BUILD_MODEL___
|
||||
node_num_ = 0;
|
||||
#endif
|
||||
}
|
||||
|
||||
SpellingTrie::~SpellingTrie() {
|
||||
if (NULL != spelling_buf_)
|
||||
delete [] spelling_buf_;
|
||||
|
||||
if (NULL != splstr_queried_)
|
||||
delete [] splstr_queried_;
|
||||
|
||||
if (NULL != splstr16_queried_)
|
||||
delete [] splstr16_queried_;
|
||||
|
||||
if (NULL != spl_ym_ids_)
|
||||
delete [] spl_ym_ids_;
|
||||
|
||||
if (NULL != root_) {
|
||||
free_son_trie(root_);
|
||||
delete root_;
|
||||
}
|
||||
|
||||
if (NULL != dumb_node_) {
|
||||
delete [] dumb_node_;
|
||||
}
|
||||
|
||||
if (NULL != splitter_node_) {
|
||||
delete [] splitter_node_;
|
||||
}
|
||||
|
||||
if (NULL != instance_) {
|
||||
delete instance_;
|
||||
instance_ = NULL;
|
||||
}
|
||||
|
||||
if (NULL != ym_buf_)
|
||||
delete [] ym_buf_;
|
||||
|
||||
if (NULL != f2h_)
|
||||
delete [] f2h_;
|
||||
}
|
||||
|
||||
bool SpellingTrie::if_valid_id_update(uint16 *splid) const {
|
||||
if (NULL == splid || 0 == *splid)
|
||||
return false;
|
||||
|
||||
if (*splid >= kFullSplIdStart)
|
||||
return true;
|
||||
if (*splid < kFullSplIdStart) {
|
||||
char ch = kHalfId2Sc_[*splid];
|
||||
if (ch > 'Z') {
|
||||
return true;
|
||||
} else {
|
||||
if (szm_is_enabled(ch)) {
|
||||
return true;
|
||||
} else if (is_yunmu_char(ch)) {
|
||||
assert(h2f_num_[*splid] > 0);
|
||||
*splid = h2f_start_[*splid];
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
bool SpellingTrie::is_half_id(uint16 splid) const {
|
||||
if (0 == splid || splid >= kFullSplIdStart)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool SpellingTrie::is_full_id(uint16 splid) const {
|
||||
if (splid < kFullSplIdStart || splid >= kFullSplIdStart + spelling_num_)
|
||||
return false;
|
||||
return true;
|
||||
}
|
||||
|
||||
bool SpellingTrie::half_full_compatible(uint16 half_id, uint16 full_id) const {
|
||||
uint16 half_fr_full = full_to_half(full_id);
|
||||
|
||||
if (half_fr_full == half_id)
|
||||
return true;
|
||||
|
||||
// &~0x20 is used to conver the char to upper case.
|
||||
// So that Zh/Ch/Sh(whose char is z/c/s) can be matched with Z/C/S.
|
||||
char ch_f = (kHalfId2Sc_[half_fr_full] & (~0x20));
|
||||
char ch_h = kHalfId2Sc_[half_id];
|
||||
if (ch_f == ch_h)
|
||||
return true;
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
bool SpellingTrie::is_half_id_yunmu(uint16 splid) const {
|
||||
if (0 == splid || splid >= kFullSplIdStart)
|
||||
return false;
|
||||
|
||||
char ch = kHalfId2Sc_[splid];
|
||||
// If ch >= 'a', that means the half id is one of Zh/Ch/Sh
|
||||
if (ch >= 'a') {
|
||||
return false;
|
||||
}
|
||||
|
||||
return char_flags_[ch - 'A'] & kHalfIdYunmuMask;
|
||||
}
|
||||
|
||||
bool SpellingTrie::is_shengmu_char(char ch) const {
|
||||
return char_flags_[ch - 'A'] & kHalfIdShengmuMask;
|
||||
}
|
||||
|
||||
bool SpellingTrie::is_yunmu_char(char ch) const {
|
||||
return char_flags_[ch - 'A'] & kHalfIdYunmuMask;
|
||||
}
|
||||
|
||||
bool SpellingTrie::is_szm_char(char ch) const {
|
||||
return is_shengmu_char(ch) || is_yunmu_char(ch);
|
||||
}
|
||||
|
||||
bool SpellingTrie::szm_is_enabled(char ch) const {
|
||||
return char_flags_[ch - 'A'] & kHalfIdSzmMask;
|
||||
}
|
||||
|
||||
void SpellingTrie::szm_enable_shm(bool enable) {
|
||||
if (enable) {
|
||||
for (char ch = 'A'; ch <= 'Z'; ch++) {
|
||||
if (is_shengmu_char(ch))
|
||||
char_flags_[ch - 'A'] = char_flags_[ch - 'A'] | kHalfIdSzmMask;
|
||||
}
|
||||
} else {
|
||||
for (char ch = 'A'; ch <= 'Z'; ch++) {
|
||||
if (is_shengmu_char(ch))
|
||||
char_flags_[ch - 'A'] = char_flags_[ch - 'A'] & (kHalfIdSzmMask ^ 0xff);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void SpellingTrie::szm_enable_ym(bool enable) {
|
||||
if (enable) {
|
||||
for (char ch = 'A'; ch <= 'Z'; ch++) {
|
||||
if (is_yunmu_char(ch))
|
||||
char_flags_[ch - 'A'] = char_flags_[ch - 'A'] | kHalfIdSzmMask;
|
||||
}
|
||||
} else {
|
||||
for (char ch = 'A'; ch <= 'Z'; ch++) {
|
||||
if (is_yunmu_char(ch))
|
||||
char_flags_[ch - 'A'] = char_flags_[ch - 'A'] & (kHalfIdSzmMask ^ 0xff);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
bool SpellingTrie::is_szm_enabled(char ch) const {
|
||||
return char_flags_[ch - 'A'] & kHalfIdSzmMask;
|
||||
}
|
||||
|
||||
const SpellingTrie* SpellingTrie::get_cpinstance() {
|
||||
return &get_instance();
|
||||
}
|
||||
|
||||
SpellingTrie& SpellingTrie::get_instance() {
|
||||
if (NULL == instance_)
|
||||
instance_ = new SpellingTrie();
|
||||
|
||||
return *instance_;
|
||||
}
|
||||
|
||||
uint16 SpellingTrie::half2full_num(uint16 half_id) const {
|
||||
if (NULL == root_ || half_id >= kFullSplIdStart)
|
||||
return 0;
|
||||
return h2f_num_[half_id];
|
||||
}
|
||||
|
||||
uint16 SpellingTrie::half_to_full(uint16 half_id, uint16 *spl_id_start) const {
|
||||
if (NULL == spl_id_start || NULL == root_ || half_id >= kFullSplIdStart)
|
||||
return 0;
|
||||
|
||||
*spl_id_start = h2f_start_[half_id];
|
||||
return h2f_num_[half_id];
|
||||
}
|
||||
|
||||
uint16 SpellingTrie::full_to_half(uint16 full_id) const {
|
||||
if (NULL == root_ || full_id < kFullSplIdStart ||
|
||||
full_id > spelling_num_ + kFullSplIdStart)
|
||||
return 0;
|
||||
|
||||
return f2h_[full_id - kFullSplIdStart];
|
||||
}
|
||||
|
||||
void SpellingTrie::free_son_trie(SpellingNode* node) {
|
||||
if (NULL == node)
|
||||
return;
|
||||
|
||||
for (size_t pos = 0; pos < node->num_of_son; pos++) {
|
||||
free_son_trie(node->first_son + pos);
|
||||
}
|
||||
|
||||
if (NULL != node->first_son)
|
||||
delete [] node->first_son;
|
||||
}
|
||||
|
||||
bool SpellingTrie::construct(const char* spelling_arr, size_t item_size,
|
||||
size_t item_num, float score_amplifier,
|
||||
unsigned char average_score) {
|
||||
if (spelling_arr == NULL)
|
||||
return false;
|
||||
|
||||
memset(h2f_start_, 0, sizeof(uint16) * kFullSplIdStart);
|
||||
memset(h2f_num_, 0, sizeof(uint16) * kFullSplIdStart);
|
||||
|
||||
// If the arr is the same as the buf, means this function is called by
|
||||
// load_table(), the table data are ready; otherwise the array should be
|
||||
// saved.
|
||||
if (spelling_arr != spelling_buf_) {
|
||||
if (NULL != spelling_buf_)
|
||||
delete [] spelling_buf_;
|
||||
spelling_buf_ = new char[item_size * item_num];
|
||||
if (NULL == spelling_buf_)
|
||||
return false;
|
||||
memcpy(spelling_buf_, spelling_arr, sizeof(char) * item_size * item_num);
|
||||
}
|
||||
|
||||
spelling_size_ = item_size;
|
||||
spelling_num_ = item_num;
|
||||
|
||||
score_amplifier_ = score_amplifier;
|
||||
average_score_ = average_score;
|
||||
|
||||
if (NULL != splstr_queried_)
|
||||
delete [] splstr_queried_;
|
||||
splstr_queried_ = new char[spelling_size_];
|
||||
if (NULL == splstr_queried_)
|
||||
return false;
|
||||
|
||||
if (NULL != splstr16_queried_)
|
||||
delete [] splstr16_queried_;
|
||||
splstr16_queried_ = new char16[spelling_size_];
|
||||
if (NULL == splstr16_queried_)
|
||||
return false;
|
||||
|
||||
// First, sort the buf to ensure they are in ascendant order
|
||||
qsort(spelling_buf_, spelling_num_, spelling_size_, compare_spl);
|
||||
|
||||
#ifdef ___BUILD_MODEL___
|
||||
node_num_ = 1;
|
||||
#endif
|
||||
|
||||
root_ = new SpellingNode();
|
||||
memset(root_, 0, sizeof(SpellingNode));
|
||||
|
||||
dumb_node_ = new SpellingNode();
|
||||
memset(dumb_node_, 0, sizeof(SpellingNode));
|
||||
dumb_node_->score = average_score_;
|
||||
|
||||
splitter_node_ = new SpellingNode();
|
||||
memset(splitter_node_, 0, sizeof(SpellingNode));
|
||||
splitter_node_->score = average_score_;
|
||||
|
||||
memset(level1_sons_, 0, sizeof(SpellingNode*) * kValidSplCharNum);
|
||||
|
||||
root_->first_son = construct_spellings_subset(0, spelling_num_, 0, root_);
|
||||
|
||||
// Root's score should be cleared.
|
||||
root_->score = 0;
|
||||
|
||||
if (NULL == root_->first_son)
|
||||
return false;
|
||||
|
||||
h2f_start_[0] = h2f_num_[0] = 0;
|
||||
|
||||
if (!build_f2h())
|
||||
return false;
|
||||
|
||||
#ifdef ___BUILD_MODEL___
|
||||
if (kPrintDebug0) {
|
||||
printf("---SpellingTrie Nodes: %d\n", (int)node_num_);
|
||||
}
|
||||
return build_ym_info();
|
||||
#else
|
||||
return true;
|
||||
#endif
|
||||
}
|
||||
|
||||
#ifdef ___BUILD_MODEL___
|
||||
const char* SpellingTrie::get_ym_str(const char *spl_str) {
|
||||
bool start_ZCS = false;
|
||||
if (is_shengmu_char(*spl_str)) {
|
||||
if ('Z' == *spl_str || 'C' == *spl_str || 'S' == *spl_str)
|
||||
start_ZCS = true;
|
||||
spl_str += 1;
|
||||
if (start_ZCS && 'h' == *spl_str)
|
||||
spl_str += 1;
|
||||
}
|
||||
return spl_str;
|
||||
}
|
||||
|
||||
bool SpellingTrie::build_ym_info() {
|
||||
bool sucess;
|
||||
SpellingTable *spl_table = new SpellingTable();
|
||||
|
||||
sucess = spl_table->init_table(kMaxPinyinSize - 1, 2 * kMaxYmNum, false);
|
||||
assert(sucess);
|
||||
|
||||
for (uint16 pos = 0; pos < spelling_num_; pos++) {
|
||||
const char *spl_str = spelling_buf_ + spelling_size_ * pos;
|
||||
spl_str = get_ym_str(spl_str);
|
||||
if ('\0' != spl_str[0]) {
|
||||
sucess = spl_table->put_spelling(spl_str, 0);
|
||||
assert(sucess);
|
||||
}
|
||||
}
|
||||
|
||||
size_t ym_item_size; // '\0' is included
|
||||
size_t ym_num;
|
||||
const char* ym_buf;
|
||||
ym_buf = spl_table->arrange(&ym_item_size, &ym_num);
|
||||
|
||||
if (NULL != ym_buf_)
|
||||
delete [] ym_buf_;
|
||||
ym_buf_ = new char[ym_item_size * ym_num];
|
||||
if (NULL == ym_buf_) {
|
||||
delete spl_table;
|
||||
return false;
|
||||
}
|
||||
|
||||
memcpy(ym_buf_, ym_buf, sizeof(char) * ym_item_size * ym_num);
|
||||
ym_size_ = ym_item_size;
|
||||
ym_num_ = ym_num;
|
||||
|
||||
delete spl_table;
|
||||
|
||||
// Generate the maping from the spelling ids to the Yunmu ids.
|
||||
if (spl_ym_ids_)
|
||||
delete spl_ym_ids_;
|
||||
spl_ym_ids_ = new uint8[spelling_num_ + kFullSplIdStart];
|
||||
if (NULL == spl_ym_ids_)
|
||||
return false;
|
||||
|
||||
memset(spl_ym_ids_, 0, sizeof(uint8) * (spelling_num_ + kFullSplIdStart));
|
||||
|
||||
for (uint16 id = 1; id < spelling_num_ + kFullSplIdStart; id++) {
|
||||
const char *str = get_spelling_str(id);
|
||||
|
||||
str = get_ym_str(str);
|
||||
if ('\0' != str[0]) {
|
||||
uint8 ym_id = get_ym_id(str);
|
||||
spl_ym_ids_[id] = ym_id;
|
||||
assert(ym_id > 0);
|
||||
} else {
|
||||
spl_ym_ids_[id] = 0;
|
||||
}
|
||||
}
|
||||
return true;
|
||||
}
|
||||
#endif
|
||||
|
||||
SpellingNode* SpellingTrie::construct_spellings_subset(
|
||||
size_t item_start, size_t item_end, size_t level, SpellingNode* parent) {
|
||||
if (level >= spelling_size_ || item_end <= item_start || NULL == parent)
|
||||
return NULL;
|
||||
|
||||
SpellingNode *first_son = NULL;
|
||||
uint16 num_of_son = 0;
|
||||
unsigned char min_son_score = 255;
|
||||
|
||||
const char *spelling_last_start = spelling_buf_ + spelling_size_ * item_start;
|
||||
char char_for_node = spelling_last_start[level];
|
||||
assert((char_for_node >= 'A' && char_for_node <= 'Z') ||
|
||||
'h' == char_for_node);
|
||||
|
||||
// Scan the array to find how many sons
|
||||
for (size_t i = item_start + 1; i < item_end; i++) {
|
||||
const char *spelling_current = spelling_buf_ + spelling_size_ * i;
|
||||
char char_current = spelling_current[level];
|
||||
if (char_current != char_for_node) {
|
||||
num_of_son++;
|
||||
char_for_node = char_current;
|
||||
}
|
||||
}
|
||||
num_of_son++;
|
||||
|
||||
// Allocate memory
|
||||
#ifdef ___BUILD_MODEL___
|
||||
node_num_ += num_of_son;
|
||||
#endif
|
||||
first_son = new SpellingNode[num_of_son];
|
||||
memset(first_son, 0, sizeof(SpellingNode)*num_of_son);
|
||||
|
||||
// Now begin construct tree
|
||||
size_t son_pos = 0;
|
||||
|
||||
spelling_last_start = spelling_buf_ + spelling_size_ * item_start;
|
||||
char_for_node = spelling_last_start[level];
|
||||
|
||||
bool spelling_endable = true;
|
||||
if (spelling_last_start[level + 1] != '\0')
|
||||
spelling_endable = false;
|
||||
|
||||
size_t item_start_next = item_start;
|
||||
|
||||
for (size_t i = item_start + 1; i < item_end; i++) {
|
||||
const char *spelling_current = spelling_buf_ + spelling_size_ * i;
|
||||
char char_current = spelling_current[level];
|
||||
assert(is_valid_spl_char(char_current));
|
||||
|
||||
if (char_current != char_for_node) {
|
||||
// Construct a node
|
||||
SpellingNode *node_current = first_son + son_pos;
|
||||
node_current->char_this_node = char_for_node;
|
||||
|
||||
// For quick search in the first level
|
||||
if (0 == level)
|
||||
level1_sons_[char_for_node - 'A'] = node_current;
|
||||
|
||||
if (spelling_endable) {
|
||||
node_current->spelling_idx = kFullSplIdStart + item_start_next;
|
||||
}
|
||||
|
||||
if (spelling_last_start[level + 1] != '\0' || i - item_start_next > 1) {
|
||||
size_t real_start = item_start_next;
|
||||
if (spelling_last_start[level + 1] == '\0')
|
||||
real_start++;
|
||||
|
||||
node_current->first_son =
|
||||
construct_spellings_subset(real_start, i, level + 1,
|
||||
node_current);
|
||||
|
||||
if (real_start == item_start_next + 1) {
|
||||
uint16 score_this = static_cast<unsigned char>(
|
||||
spelling_last_start[spelling_size_ - 1]);
|
||||
if (score_this < node_current->score)
|
||||
node_current->score = score_this;
|
||||
}
|
||||
} else {
|
||||
node_current->first_son = NULL;
|
||||
node_current->score = static_cast<unsigned char>(
|
||||
spelling_last_start[spelling_size_ - 1]);
|
||||
}
|
||||
|
||||
if (node_current->score < min_son_score)
|
||||
min_son_score = node_current->score;
|
||||
|
||||
bool is_half = false;
|
||||
if (level == 0 && is_szm_char(char_for_node)) {
|
||||
node_current->spelling_idx =
|
||||
static_cast<uint16>(char_for_node - 'A' + 1);
|
||||
|
||||
if (char_for_node > 'C')
|
||||
node_current->spelling_idx++;
|
||||
if (char_for_node > 'S')
|
||||
node_current->spelling_idx++;
|
||||
|
||||
h2f_num_[node_current->spelling_idx] = i - item_start_next;
|
||||
is_half = true;
|
||||
} else if (level == 1 && char_for_node == 'h') {
|
||||
char ch_level0 = spelling_last_start[0];
|
||||
uint16 part_id = 0;
|
||||
if (ch_level0 == 'C')
|
||||
part_id = 'C' - 'A' + 1 + 1;
|
||||
else if (ch_level0 == 'S')
|
||||
part_id = 'S' - 'A' + 1 + 2;
|
||||
else if (ch_level0 == 'Z')
|
||||
part_id = 'Z' - 'A' + 1 + 3;
|
||||
if (0 != part_id) {
|
||||
node_current->spelling_idx = part_id;
|
||||
h2f_num_[node_current->spelling_idx] = i - item_start_next;
|
||||
is_half = true;
|
||||
}
|
||||
}
|
||||
|
||||
if (is_half) {
|
||||
if (h2f_num_[node_current->spelling_idx] > 0)
|
||||
h2f_start_[node_current->spelling_idx] =
|
||||
item_start_next + kFullSplIdStart;
|
||||
else
|
||||
h2f_start_[node_current->spelling_idx] = 0;
|
||||
}
|
||||
|
||||
// for next sibling
|
||||
spelling_last_start = spelling_current;
|
||||
char_for_node = char_current;
|
||||
item_start_next = i;
|
||||
spelling_endable = true;
|
||||
if (spelling_current[level + 1] != '\0')
|
||||
spelling_endable = false;
|
||||
|
||||
son_pos++;
|
||||
}
|
||||
}
|
||||
|
||||
// the last one
|
||||
SpellingNode *node_current = first_son + son_pos;
|
||||
node_current->char_this_node = char_for_node;
|
||||
|
||||
// For quick search in the first level
|
||||
if (0 == level)
|
||||
level1_sons_[char_for_node - 'A'] = node_current;
|
||||
|
||||
if (spelling_endable) {
|
||||
node_current->spelling_idx = kFullSplIdStart + item_start_next;
|
||||
}
|
||||
|
||||
if (spelling_last_start[level + 1] != '\0' ||
|
||||
item_end - item_start_next > 1) {
|
||||
size_t real_start = item_start_next;
|
||||
if (spelling_last_start[level + 1] == '\0')
|
||||
real_start++;
|
||||
|
||||
node_current->first_son =
|
||||
construct_spellings_subset(real_start, item_end, level + 1,
|
||||
node_current);
|
||||
|
||||
if (real_start == item_start_next + 1) {
|
||||
uint16 score_this = static_cast<unsigned char>(
|
||||
spelling_last_start[spelling_size_ - 1]);
|
||||
if (score_this < node_current->score)
|
||||
node_current->score = score_this;
|
||||
}
|
||||
} else {
|
||||
node_current->first_son = NULL;
|
||||
node_current->score = static_cast<unsigned char>(
|
||||
spelling_last_start[spelling_size_ - 1]);
|
||||
}
|
||||
|
||||
if (node_current->score < min_son_score)
|
||||
min_son_score = node_current->score;
|
||||
|
||||
assert(son_pos + 1 == num_of_son);
|
||||
|
||||
bool is_half = false;
|
||||
if (level == 0 && szm_is_enabled(char_for_node)) {
|
||||
node_current->spelling_idx = static_cast<uint16>(char_for_node - 'A' + 1);
|
||||
|
||||
if (char_for_node > 'C')
|
||||
node_current->spelling_idx++;
|
||||
if (char_for_node > 'S')
|
||||
node_current->spelling_idx++;
|
||||
|
||||
h2f_num_[node_current->spelling_idx] = item_end - item_start_next;
|
||||
is_half = true;
|
||||
} else if (level == 1 && char_for_node == 'h') {
|
||||
char ch_level0 = spelling_last_start[0];
|
||||
uint16 part_id = 0;
|
||||
if (ch_level0 == 'C')
|
||||
part_id = 'C' - 'A' + 1 + 1;
|
||||
else if (ch_level0 == 'S')
|
||||
part_id = 'S' - 'A' + 1 + 2;
|
||||
else if (ch_level0 == 'Z')
|
||||
part_id = 'Z' - 'A' + 1 + 3;
|
||||
if (0 != part_id) {
|
||||
node_current->spelling_idx = part_id;
|
||||
h2f_num_[node_current->spelling_idx] = item_end - item_start_next;
|
||||
is_half = true;
|
||||
}
|
||||
}
|
||||
if (is_half) {
|
||||
if (h2f_num_[node_current->spelling_idx] > 0)
|
||||
h2f_start_[node_current->spelling_idx] =
|
||||
item_start_next + kFullSplIdStart;
|
||||
else
|
||||
h2f_start_[node_current->spelling_idx] = 0;
|
||||
}
|
||||
|
||||
parent->num_of_son = num_of_son;
|
||||
parent->score = min_son_score;
|
||||
return first_son;
|
||||
}
|
||||
|
||||
bool SpellingTrie::save_spl_trie(FILE *fp) {
|
||||
if (NULL == fp || NULL == spelling_buf_)
|
||||
return false;
|
||||
|
||||
if (fwrite(&spelling_size_, sizeof(uint32), 1, fp) != 1)
|
||||
return false;
|
||||
|
||||
if (fwrite(&spelling_num_, sizeof(uint32), 1, fp) != 1)
|
||||
return false;
|
||||
|
||||
if (fwrite(&score_amplifier_, sizeof(float), 1, fp) != 1)
|
||||
return false;
|
||||
|
||||
if (fwrite(&average_score_, sizeof(unsigned char), 1, fp) != 1)
|
||||
return false;
|
||||
|
||||
if (fwrite(spelling_buf_, sizeof(char) * spelling_size_,
|
||||
spelling_num_, fp) != spelling_num_)
|
||||
return false;
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
bool SpellingTrie::load_spl_trie(FILE *fp) {
|
||||
if (NULL == fp)
|
||||
return false;
|
||||
|
||||
if (fread(&spelling_size_, sizeof(uint32), 1, fp) != 1)
|
||||
return false;
|
||||
|
||||
if (fread(&spelling_num_, sizeof(uint32), 1, fp) != 1)
|
||||
return false;
|
||||
|
||||
if (fread(&score_amplifier_, sizeof(float), 1, fp) != 1)
|
||||
return false;
|
||||
|
||||
if (fread(&average_score_, sizeof(unsigned char), 1, fp) != 1)
|
||||
return false;
|
||||
|
||||
if (NULL != spelling_buf_)
|
||||
delete [] spelling_buf_;
|
||||
|
||||
spelling_buf_ = new char[spelling_size_ * spelling_num_];
|
||||
if (NULL == spelling_buf_)
|
||||
return false;
|
||||
|
||||
if (fread(spelling_buf_, sizeof(char) * spelling_size_,
|
||||
spelling_num_, fp) != spelling_num_)
|
||||
return false;
|
||||
|
||||
return construct(spelling_buf_, spelling_size_, spelling_num_,
|
||||
score_amplifier_, average_score_);
|
||||
}
|
||||
|
||||
bool SpellingTrie::build_f2h() {
|
||||
if (NULL != f2h_)
|
||||
delete [] f2h_;
|
||||
f2h_ = new uint16[spelling_num_];
|
||||
if (NULL == f2h_)
|
||||
return false;
|
||||
|
||||
for (uint16 hid = 0; hid < kFullSplIdStart; hid++) {
|
||||
for (uint16 fid = h2f_start_[hid];
|
||||
fid < h2f_start_[hid] + h2f_num_[hid]; fid++)
|
||||
f2h_[fid - kFullSplIdStart] = hid;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
size_t SpellingTrie::get_spelling_num() {
|
||||
return spelling_num_;
|
||||
}
|
||||
|
||||
uint8 SpellingTrie::get_ym_id(const char *ym_str) {
|
||||
if (NULL == ym_str || NULL == ym_buf_)
|
||||
return 0;
|
||||
|
||||
for (uint8 pos = 0; pos < ym_num_; pos++)
|
||||
if (strcmp(ym_buf_ + ym_size_ * pos, ym_str) == 0)
|
||||
return pos + 1;
|
||||
|
||||
return 0;
|
||||
}
|
||||
|
||||
const char* SpellingTrie::get_spelling_str(uint16 splid) {
|
||||
splstr_queried_[0] = '\0';
|
||||
|
||||
if (splid >= kFullSplIdStart) {
|
||||
splid -= kFullSplIdStart;
|
||||
snprintf(splstr_queried_, spelling_size_, "%s",
|
||||
spelling_buf_ + splid * spelling_size_);
|
||||
} else {
|
||||
if (splid == 'C' - 'A' + 1 + 1) {
|
||||
snprintf(splstr_queried_, spelling_size_, "%s", "Ch");
|
||||
} else if (splid == 'S' - 'A' + 1 + 2) {
|
||||
snprintf(splstr_queried_, spelling_size_, "%s", "Sh");
|
||||
} else if (splid == 'Z' - 'A' + 1 + 3) {
|
||||
snprintf(splstr_queried_, spelling_size_, "%s", "Zh");
|
||||
} else {
|
||||
if (splid > 'C' - 'A' + 1)
|
||||
splid--;
|
||||
if (splid > 'S' - 'A' + 1)
|
||||
splid--;
|
||||
splstr_queried_[0] = 'A' + splid - 1;
|
||||
splstr_queried_[1] = '\0';
|
||||
}
|
||||
}
|
||||
return splstr_queried_;
|
||||
}
|
||||
|
||||
const char16* SpellingTrie::get_spelling_str16(uint16 splid) {
|
||||
splstr16_queried_[0] = '\0';
|
||||
|
||||
if (splid >= kFullSplIdStart) {
|
||||
splid -= kFullSplIdStart;
|
||||
for (size_t pos = 0; pos < spelling_size_; pos++) {
|
||||
splstr16_queried_[pos] = static_cast<char16>
|
||||
(spelling_buf_[splid * spelling_size_ + pos]);
|
||||
}
|
||||
} else {
|
||||
if (splid == 'C' - 'A' + 1 + 1) {
|
||||
splstr16_queried_[0] = static_cast<char16>('C');
|
||||
splstr16_queried_[1] = static_cast<char16>('h');
|
||||
splstr16_queried_[2] = static_cast<char16>('\0');
|
||||
} else if (splid == 'S' - 'A' + 1 + 2) {
|
||||
splstr16_queried_[0] = static_cast<char16>('S');
|
||||
splstr16_queried_[1] = static_cast<char16>('h');
|
||||
splstr16_queried_[2] = static_cast<char16>('\0');
|
||||
} else if (splid == 'Z' - 'A' + 1 + 3) {
|
||||
splstr16_queried_[0] = static_cast<char16>('Z');
|
||||
splstr16_queried_[1] = static_cast<char16>('h');
|
||||
splstr16_queried_[2] = static_cast<char16>('\0');
|
||||
} else {
|
||||
if (splid > 'C' - 'A' + 1)
|
||||
splid--;
|
||||
if (splid > 'S' - 'A' + 1)
|
||||
splid--;
|
||||
splstr16_queried_[0] = 'A' + splid - 1;
|
||||
splstr16_queried_[1] = '\0';
|
||||
}
|
||||
}
|
||||
return splstr16_queried_;
|
||||
}
|
||||
|
||||
size_t SpellingTrie::get_spelling_str16(uint16 splid, char16 *splstr16,
|
||||
size_t splstr16_len) {
|
||||
if (NULL == splstr16 || splstr16_len < kMaxPinyinSize + 1) return 0;
|
||||
|
||||
if (splid >= kFullSplIdStart) {
|
||||
splid -= kFullSplIdStart;
|
||||
for (size_t pos = 0; pos <= kMaxPinyinSize; pos++) {
|
||||
splstr16[pos] = static_cast<char16>
|
||||
(spelling_buf_[splid * spelling_size_ + pos]);
|
||||
if (static_cast<char16>('\0') == splstr16[pos]) {
|
||||
return pos;
|
||||
}
|
||||
}
|
||||
} else {
|
||||
if (splid == 'C' - 'A' + 1 + 1) {
|
||||
splstr16[0] = static_cast<char16>('C');
|
||||
splstr16[1] = static_cast<char16>('h');
|
||||
splstr16[2] = static_cast<char16>('\0');
|
||||
return 2;
|
||||
} else if (splid == 'S' - 'A' + 1 + 2) {
|
||||
splstr16[0] = static_cast<char16>('S');
|
||||
splstr16[1] = static_cast<char16>('h');
|
||||
splstr16[2] = static_cast<char16>('\0');
|
||||
return 2;
|
||||
} else if (splid == 'Z' - 'A' + 1 + 3) {
|
||||
splstr16[0] = static_cast<char16>('Z');
|
||||
splstr16[1] = static_cast<char16>('h');
|
||||
splstr16[2] = static_cast<char16>('\0');
|
||||
return 2;
|
||||
} else {
|
||||
if (splid > 'C' - 'A' + 1)
|
||||
splid--;
|
||||
if (splid > 'S' - 'A' + 1)
|
||||
splid--;
|
||||
splstr16[0] = 'A' + splid - 1;
|
||||
splstr16[1] = '\0';
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
// Not reachable.
|
||||
return 0;
|
||||
}
|
||||
|
||||
} // namespace ime_pinyin
|
|
@ -0,0 +1,258 @@
|
|||
/*
|
||||
* Copyright (C) 2009 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef PINYINIME_INCLUDE_SPELLINGTRIE_H__
|
||||
#define PINYINIME_INCLUDE_SPELLINGTRIE_H__
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include "./dictdef.h"
|
||||
|
||||
namespace ime_pinyin {
|
||||
|
||||
static const unsigned short kFullSplIdStart = kHalfSpellingIdNum + 1;
|
||||
|
||||
// Node used for the trie of spellings
|
||||
struct SpellingNode {
|
||||
SpellingNode *first_son;
|
||||
// The spelling id for each node. If you need more bits to store
|
||||
// spelling id, please adjust this structure.
|
||||
uint16 spelling_idx:11;
|
||||
uint16 num_of_son:5;
|
||||
char char_this_node;
|
||||
unsigned char score;
|
||||
};
|
||||
|
||||
class SpellingTrie {
|
||||
private:
|
||||
static const int kMaxYmNum = 64;
|
||||
static const size_t kValidSplCharNum = 26;
|
||||
|
||||
static const uint16 kHalfIdShengmuMask = 0x01;
|
||||
static const uint16 kHalfIdYunmuMask = 0x02;
|
||||
static const uint16 kHalfIdSzmMask = 0x04;
|
||||
|
||||
// Map from half spelling id to single char.
|
||||
// For half ids of Zh/Ch/Sh, map to z/c/s (low case) respectively.
|
||||
// For example, 1 to 'A', 2 to 'B', 3 to 'C', 4 to 'c', 5 to 'D', ...,
|
||||
// 28 to 'Z', 29 to 'z'.
|
||||
// [0] is not used to achieve better efficiency.
|
||||
static const char kHalfId2Sc_[kFullSplIdStart + 1];
|
||||
|
||||
static unsigned char char_flags_[];
|
||||
static SpellingTrie* instance_;
|
||||
|
||||
// The spelling table
|
||||
char *spelling_buf_;
|
||||
|
||||
// The size of longest spelling string, includes '\0' and an extra char to
|
||||
// store score. For example, "zhuang" is the longgest item in Pinyin list,
|
||||
// so spelling_size_ is 8.
|
||||
// Structure: The string ended with '\0' + score char.
|
||||
// An item with a lower score has a higher probability.
|
||||
uint32 spelling_size_;
|
||||
|
||||
// Number of full spelling ids.
|
||||
uint32 spelling_num_;
|
||||
|
||||
float score_amplifier_;
|
||||
unsigned char average_score_;
|
||||
|
||||
// The Yunmu id list for the spelling ids (for half ids of Shengmu,
|
||||
// the Yunmu id is 0).
|
||||
// The length of the list is spelling_num_ + kFullSplIdStart,
|
||||
// so that spl_ym_ids_[splid] is the Yunmu id of the splid.
|
||||
uint8 *spl_ym_ids_;
|
||||
|
||||
// The Yunmu table.
|
||||
// Each Yunmu will be assigned with Yunmu id from 1.
|
||||
char *ym_buf_;
|
||||
size_t ym_size_; // The size of longest Yunmu string, '\0'included.
|
||||
size_t ym_num_;
|
||||
|
||||
// The spelling string just queried
|
||||
char *splstr_queried_;
|
||||
|
||||
// The spelling string just queried
|
||||
char16 *splstr16_queried_;
|
||||
|
||||
// The root node of the spelling tree
|
||||
SpellingNode* root_;
|
||||
|
||||
// If a none qwerty key such as a fnction key like ENTER is given, this node
|
||||
// will be used to indicate that this is not a QWERTY node.
|
||||
SpellingNode* dumb_node_;
|
||||
|
||||
// If a splitter key is pressed, this node will be used to indicate that this
|
||||
// is a splitter key.
|
||||
SpellingNode* splitter_node_;
|
||||
|
||||
// Used to get the first level sons.
|
||||
SpellingNode* level1_sons_[kValidSplCharNum];
|
||||
|
||||
// The full spl_id range for specific half id.
|
||||
// h2f means half to full.
|
||||
// A half id can be a ShouZiMu id (id to represent the first char of a full
|
||||
// spelling, including Shengmu and Yunmu), or id of zh/ch/sh.
|
||||
// [1..kFullSplIdStart-1] is the arrange of half id.
|
||||
uint16 h2f_start_[kFullSplIdStart];
|
||||
uint16 h2f_num_[kFullSplIdStart];
|
||||
|
||||
// Map from full id to half id.
|
||||
uint16 *f2h_;
|
||||
|
||||
#ifdef ___BUILD_MODEL___
|
||||
// How many node used to build the trie.
|
||||
size_t node_num_;
|
||||
#endif
|
||||
|
||||
SpellingTrie();
|
||||
|
||||
void free_son_trie(SpellingNode* node);
|
||||
|
||||
// Construct a subtree using a subset of the spelling array (from
|
||||
// item_star to item_end).
|
||||
// Member spelliing_buf_ and spelling_size_ should be valid.
|
||||
// parent is used to update its num_of_son and score.
|
||||
SpellingNode* construct_spellings_subset(size_t item_start, size_t item_end,
|
||||
size_t level, SpellingNode *parent);
|
||||
bool build_f2h();
|
||||
|
||||
// The caller should guarantee ch >= 'A' && ch <= 'Z'
|
||||
bool is_shengmu_char(char ch) const;
|
||||
|
||||
// The caller should guarantee ch >= 'A' && ch <= 'Z'
|
||||
bool is_yunmu_char(char ch) const;
|
||||
|
||||
#ifdef ___BUILD_MODEL___
|
||||
// Given a spelling string, return its Yunmu string.
|
||||
// The caller guaratees spl_str is valid.
|
||||
const char* get_ym_str(const char *spl_str);
|
||||
|
||||
// Build the Yunmu list, and the mapping relation between the full ids and the
|
||||
// Yunmu ids. This functin is called after the spelling trie is built.
|
||||
bool build_ym_info();
|
||||
#endif
|
||||
|
||||
friend class SpellingParser;
|
||||
friend class SmartSplParser;
|
||||
friend class SmartSplParser2;
|
||||
|
||||
public:
|
||||
~SpellingTrie();
|
||||
|
||||
inline static bool is_valid_spl_char(char ch) {
|
||||
return (ch >= 'a' && ch <= 'z') || (ch >= 'A' && ch <= 'Z');
|
||||
}
|
||||
|
||||
// The caller guarantees that the two chars are valid spelling chars.
|
||||
inline static bool is_same_spl_char(char ch1, char ch2) {
|
||||
return ch1 == ch2 || ch1 - ch2 == 'a' - 'A' || ch2 - ch1 == 'a' - 'A';
|
||||
}
|
||||
|
||||
// Construct the tree from the input pinyin array
|
||||
// The given string list should have been sorted.
|
||||
// score_amplifier is used to convert a possibility value into score.
|
||||
// average_score is the average_score of all spellings. The dumb node is
|
||||
// assigned with this score.
|
||||
bool construct(const char* spelling_arr, size_t item_size, size_t item_num,
|
||||
float score_amplifier, unsigned char average_score);
|
||||
|
||||
// Test if the given id is a valid spelling id.
|
||||
// If function returns true, the given splid may be updated like this:
|
||||
// When 'A' is not enabled in ShouZiMu mode, the parsing result for 'A' is
|
||||
// first given as a half id 1, but because 'A' is a one-char Yunmu and
|
||||
// it is a valid id, it needs to updated to its corresponding full id.
|
||||
bool if_valid_id_update(uint16 *splid) const;
|
||||
|
||||
// Test if the given id is a half id.
|
||||
bool is_half_id(uint16 splid) const;
|
||||
|
||||
bool is_full_id(uint16 splid) const;
|
||||
|
||||
// Test if the given id is a one-char Yunmu id (obviously, it is also a half
|
||||
// id), such as 'A', 'E' and 'O'.
|
||||
bool is_half_id_yunmu(uint16 splid) const;
|
||||
|
||||
// Test if this char is a ShouZiMu char. This ShouZiMu char may be not enabled.
|
||||
// For Pinyin, only i/u/v is not a ShouZiMu char.
|
||||
// The caller should guarantee that ch >= 'A' && ch <= 'Z'
|
||||
bool is_szm_char(char ch) const;
|
||||
|
||||
// Test If this char is enabled in ShouZiMu mode.
|
||||
// The caller should guarantee that ch >= 'A' && ch <= 'Z'
|
||||
bool szm_is_enabled(char ch) const;
|
||||
|
||||
// Enable/disable Shengmus in ShouZiMu mode(using the first char of a spelling
|
||||
// to input).
|
||||
void szm_enable_shm(bool enable);
|
||||
|
||||
// Enable/disable Yunmus in ShouZiMu mode.
|
||||
void szm_enable_ym(bool enable);
|
||||
|
||||
// Test if this char is enabled in ShouZiMu mode.
|
||||
// The caller should guarantee ch >= 'A' && ch <= 'Z'
|
||||
bool is_szm_enabled(char ch) const;
|
||||
|
||||
// Return the number of full ids for the given half id.
|
||||
uint16 half2full_num(uint16 half_id) const;
|
||||
|
||||
// Return the number of full ids for the given half id, and fill spl_id_start
|
||||
// to return the first full id.
|
||||
uint16 half_to_full(uint16 half_id, uint16 *spl_id_start) const;
|
||||
|
||||
// Return the corresponding half id for the given full id.
|
||||
// Not frequently used, low efficient.
|
||||
// Return 0 if fails.
|
||||
uint16 full_to_half(uint16 full_id) const;
|
||||
|
||||
// To test whether a half id is compatible with a full id.
|
||||
// Generally, when half_id == full_to_half(full_id), return true.
|
||||
// But for "Zh, Ch, Sh", if fussy mode is on, half id for 'Z' is compatible
|
||||
// with a full id like "Zhe". (Fussy mode is not ready).
|
||||
bool half_full_compatible(uint16 half_id, uint16 full_id) const;
|
||||
|
||||
static const SpellingTrie* get_cpinstance();
|
||||
|
||||
static SpellingTrie& get_instance();
|
||||
|
||||
// Save to the file stream
|
||||
bool save_spl_trie(FILE *fp);
|
||||
|
||||
// Load from the file stream
|
||||
bool load_spl_trie(FILE *fp);
|
||||
|
||||
// Get the number of spellings
|
||||
size_t get_spelling_num();
|
||||
|
||||
// Return the Yunmu id for the given Yunmu string.
|
||||
// If the string is not valid, return 0;
|
||||
uint8 get_ym_id(const char* ym_str);
|
||||
|
||||
// Get the readonly Pinyin string for a given spelling id
|
||||
const char* get_spelling_str(uint16 splid);
|
||||
|
||||
// Get the readonly Pinyin string for a given spelling id
|
||||
const char16* get_spelling_str16(uint16 splid);
|
||||
|
||||
// Get Pinyin string for a given spelling id. Return the length of the
|
||||
// string, and fill-in '\0' at the end.
|
||||
size_t get_spelling_str16(uint16 splid, char16 *splstr16,
|
||||
size_t splstr16_len);
|
||||
};
|
||||
}
|
||||
|
||||
#endif // PINYINIME_INCLUDE_SPELLINGTRIE_H__
|
|
@ -0,0 +1,341 @@
|
|||
/*
|
||||
* Copyright (C) 2009 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <assert.h>
|
||||
#include "splparser.h"
|
||||
|
||||
namespace ime_pinyin {
|
||||
|
||||
SpellingParser::SpellingParser() {
|
||||
spl_trie_ = SpellingTrie::get_cpinstance();
|
||||
}
|
||||
|
||||
bool SpellingParser::is_valid_to_parse(char ch) {
|
||||
return SpellingTrie::is_valid_spl_char(ch);
|
||||
}
|
||||
|
||||
uint16 SpellingParser::splstr_to_idxs(const char *splstr, uint16 str_len,
|
||||
uint16 spl_idx[], uint16 start_pos[],
|
||||
uint16 max_size, bool &last_is_pre) {
|
||||
if (NULL == splstr || 0 == max_size || 0 == str_len)
|
||||
return 0;
|
||||
|
||||
if (!SpellingTrie::is_valid_spl_char(splstr[0]))
|
||||
return 0;
|
||||
|
||||
last_is_pre = false;
|
||||
|
||||
const SpellingNode *node_this = spl_trie_->root_;
|
||||
|
||||
uint16 str_pos = 0;
|
||||
uint16 idx_num = 0;
|
||||
if (NULL != start_pos)
|
||||
start_pos[0] = 0;
|
||||
bool last_is_splitter = false;
|
||||
|
||||
while (str_pos < str_len) {
|
||||
char char_this = splstr[str_pos];
|
||||
// all characters outside of [a, z] are considered as splitters
|
||||
if (!SpellingTrie::is_valid_spl_char(char_this)) {
|
||||
// test if the current node is endable
|
||||
uint16 id_this = node_this->spelling_idx;
|
||||
if (spl_trie_->if_valid_id_update(&id_this)) {
|
||||
spl_idx[idx_num] = id_this;
|
||||
|
||||
idx_num++;
|
||||
str_pos++;
|
||||
if (NULL != start_pos)
|
||||
start_pos[idx_num] = str_pos;
|
||||
if (idx_num >= max_size)
|
||||
return idx_num;
|
||||
|
||||
node_this = spl_trie_->root_;
|
||||
last_is_splitter = true;
|
||||
continue;
|
||||
} else {
|
||||
if (last_is_splitter) {
|
||||
str_pos++;
|
||||
if (NULL != start_pos)
|
||||
start_pos[idx_num] = str_pos;
|
||||
continue;
|
||||
} else {
|
||||
return idx_num;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
last_is_splitter = false;
|
||||
|
||||
SpellingNode *found_son = NULL;
|
||||
|
||||
if (0 == str_pos) {
|
||||
if (char_this >= 'a')
|
||||
found_son = spl_trie_->level1_sons_[char_this - 'a'];
|
||||
else
|
||||
found_son = spl_trie_->level1_sons_[char_this - 'A'];
|
||||
} else {
|
||||
SpellingNode *first_son = node_this->first_son;
|
||||
// Because for Zh/Ch/Sh nodes, they are the last in the buffer and
|
||||
// frequently used, so we scan from the end.
|
||||
for (int i = 0; i < node_this->num_of_son; i++) {
|
||||
SpellingNode *this_son = first_son + i;
|
||||
if (SpellingTrie::is_same_spl_char(
|
||||
this_son->char_this_node, char_this)) {
|
||||
found_son = this_son;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// found, just move the current node pointer to the the son
|
||||
if (NULL != found_son) {
|
||||
node_this = found_son;
|
||||
} else {
|
||||
// not found, test if it is endable
|
||||
uint16 id_this = node_this->spelling_idx;
|
||||
if (spl_trie_->if_valid_id_update(&id_this)) {
|
||||
// endable, remember the index
|
||||
spl_idx[idx_num] = id_this;
|
||||
|
||||
idx_num++;
|
||||
if (NULL != start_pos)
|
||||
start_pos[idx_num] = str_pos;
|
||||
if (idx_num >= max_size)
|
||||
return idx_num;
|
||||
node_this = spl_trie_->root_;
|
||||
continue;
|
||||
} else {
|
||||
return idx_num;
|
||||
}
|
||||
}
|
||||
|
||||
str_pos++;
|
||||
}
|
||||
|
||||
uint16 id_this = node_this->spelling_idx;
|
||||
if (spl_trie_->if_valid_id_update(&id_this)) {
|
||||
// endable, remember the index
|
||||
spl_idx[idx_num] = id_this;
|
||||
|
||||
idx_num++;
|
||||
if (NULL != start_pos)
|
||||
start_pos[idx_num] = str_pos;
|
||||
}
|
||||
|
||||
last_is_pre = !last_is_splitter;
|
||||
|
||||
return idx_num;
|
||||
}
|
||||
|
||||
uint16 SpellingParser::splstr_to_idxs_f(const char *splstr, uint16 str_len,
|
||||
uint16 spl_idx[], uint16 start_pos[],
|
||||
uint16 max_size, bool &last_is_pre) {
|
||||
uint16 idx_num = splstr_to_idxs(splstr, str_len, spl_idx, start_pos,
|
||||
max_size, last_is_pre);
|
||||
for (uint16 pos = 0; pos < idx_num; pos++) {
|
||||
if (spl_trie_->is_half_id_yunmu(spl_idx[pos])) {
|
||||
spl_trie_->half_to_full(spl_idx[pos], spl_idx + pos);
|
||||
if (pos == idx_num - 1) {
|
||||
last_is_pre = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
return idx_num;
|
||||
}
|
||||
|
||||
uint16 SpellingParser::splstr16_to_idxs(const char16 *splstr, uint16 str_len,
|
||||
uint16 spl_idx[], uint16 start_pos[],
|
||||
uint16 max_size, bool &last_is_pre) {
|
||||
if (NULL == splstr || 0 == max_size || 0 == str_len)
|
||||
return 0;
|
||||
|
||||
if (!SpellingTrie::is_valid_spl_char(splstr[0]))
|
||||
return 0;
|
||||
|
||||
last_is_pre = false;
|
||||
|
||||
const SpellingNode *node_this = spl_trie_->root_;
|
||||
|
||||
uint16 str_pos = 0;
|
||||
uint16 idx_num = 0;
|
||||
if (NULL != start_pos)
|
||||
start_pos[0] = 0;
|
||||
bool last_is_splitter = false;
|
||||
|
||||
while (str_pos < str_len) {
|
||||
char16 char_this = splstr[str_pos];
|
||||
// all characters outside of [a, z] are considered as splitters
|
||||
if (!SpellingTrie::is_valid_spl_char(char_this)) {
|
||||
// test if the current node is endable
|
||||
uint16 id_this = node_this->spelling_idx;
|
||||
if (spl_trie_->if_valid_id_update(&id_this)) {
|
||||
spl_idx[idx_num] = id_this;
|
||||
|
||||
idx_num++;
|
||||
str_pos++;
|
||||
if (NULL != start_pos)
|
||||
start_pos[idx_num] = str_pos;
|
||||
if (idx_num >= max_size)
|
||||
return idx_num;
|
||||
|
||||
node_this = spl_trie_->root_;
|
||||
last_is_splitter = true;
|
||||
continue;
|
||||
} else {
|
||||
if (last_is_splitter) {
|
||||
str_pos++;
|
||||
if (NULL != start_pos)
|
||||
start_pos[idx_num] = str_pos;
|
||||
continue;
|
||||
} else {
|
||||
return idx_num;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
last_is_splitter = false;
|
||||
|
||||
SpellingNode *found_son = NULL;
|
||||
|
||||
if (0 == str_pos) {
|
||||
if (char_this >= 'a')
|
||||
found_son = spl_trie_->level1_sons_[char_this - 'a'];
|
||||
else
|
||||
found_son = spl_trie_->level1_sons_[char_this - 'A'];
|
||||
} else {
|
||||
SpellingNode *first_son = node_this->first_son;
|
||||
// Because for Zh/Ch/Sh nodes, they are the last in the buffer and
|
||||
// frequently used, so we scan from the end.
|
||||
for (int i = 0; i < node_this->num_of_son; i++) {
|
||||
SpellingNode *this_son = first_son + i;
|
||||
if (SpellingTrie::is_same_spl_char(
|
||||
this_son->char_this_node, char_this)) {
|
||||
found_son = this_son;
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// found, just move the current node pointer to the the son
|
||||
if (NULL != found_son) {
|
||||
node_this = found_son;
|
||||
} else {
|
||||
// not found, test if it is endable
|
||||
uint16 id_this = node_this->spelling_idx;
|
||||
if (spl_trie_->if_valid_id_update(&id_this)) {
|
||||
// endable, remember the index
|
||||
spl_idx[idx_num] = id_this;
|
||||
|
||||
idx_num++;
|
||||
if (NULL != start_pos)
|
||||
start_pos[idx_num] = str_pos;
|
||||
if (idx_num >= max_size)
|
||||
return idx_num;
|
||||
node_this = spl_trie_->root_;
|
||||
continue;
|
||||
} else {
|
||||
return idx_num;
|
||||
}
|
||||
}
|
||||
|
||||
str_pos++;
|
||||
}
|
||||
|
||||
uint16 id_this = node_this->spelling_idx;
|
||||
if (spl_trie_->if_valid_id_update(&id_this)) {
|
||||
// endable, remember the index
|
||||
spl_idx[idx_num] = id_this;
|
||||
|
||||
idx_num++;
|
||||
if (NULL != start_pos)
|
||||
start_pos[idx_num] = str_pos;
|
||||
}
|
||||
|
||||
last_is_pre = !last_is_splitter;
|
||||
|
||||
return idx_num;
|
||||
}
|
||||
|
||||
uint16 SpellingParser::splstr16_to_idxs_f(const char16 *splstr, uint16 str_len,
|
||||
uint16 spl_idx[], uint16 start_pos[],
|
||||
uint16 max_size, bool &last_is_pre) {
|
||||
uint16 idx_num = splstr16_to_idxs(splstr, str_len, spl_idx, start_pos,
|
||||
max_size, last_is_pre);
|
||||
for (uint16 pos = 0; pos < idx_num; pos++) {
|
||||
if (spl_trie_->is_half_id_yunmu(spl_idx[pos])) {
|
||||
spl_trie_->half_to_full(spl_idx[pos], spl_idx + pos);
|
||||
if (pos == idx_num - 1) {
|
||||
last_is_pre = false;
|
||||
}
|
||||
}
|
||||
}
|
||||
return idx_num;
|
||||
}
|
||||
|
||||
uint16 SpellingParser::get_splid_by_str(const char *splstr, uint16 str_len,
|
||||
bool *is_pre) {
|
||||
if (NULL == is_pre)
|
||||
return 0;
|
||||
|
||||
uint16 spl_idx[2];
|
||||
uint16 start_pos[3];
|
||||
|
||||
if (splstr_to_idxs(splstr, str_len, spl_idx, start_pos, 2, *is_pre) != 1)
|
||||
return 0;
|
||||
|
||||
if (start_pos[1] != str_len)
|
||||
return 0;
|
||||
return spl_idx[0];
|
||||
}
|
||||
|
||||
uint16 SpellingParser::get_splid_by_str_f(const char *splstr, uint16 str_len,
|
||||
bool *is_pre) {
|
||||
if (NULL == is_pre)
|
||||
return 0;
|
||||
|
||||
uint16 spl_idx[2];
|
||||
uint16 start_pos[3];
|
||||
|
||||
if (splstr_to_idxs(splstr, str_len, spl_idx, start_pos, 2, *is_pre) != 1)
|
||||
return 0;
|
||||
|
||||
if (start_pos[1] != str_len)
|
||||
return 0;
|
||||
if (spl_trie_->is_half_id_yunmu(spl_idx[0])) {
|
||||
spl_trie_->half_to_full(spl_idx[0], spl_idx);
|
||||
*is_pre = false;
|
||||
}
|
||||
|
||||
return spl_idx[0];
|
||||
}
|
||||
|
||||
uint16 SpellingParser::get_splids_parallel(const char *splstr, uint16 str_len,
|
||||
uint16 splidx[], uint16 max_size,
|
||||
uint16 &full_id_num, bool &is_pre) {
|
||||
if (max_size <= 0 || !is_valid_to_parse(splstr[0]))
|
||||
return 0;
|
||||
|
||||
splidx[0] = get_splid_by_str(splstr, str_len, &is_pre);
|
||||
full_id_num = 0;
|
||||
if (0 != splidx[0]) {
|
||||
if (splidx[0] >= kFullSplIdStart)
|
||||
full_id_num = 1;
|
||||
return 1;
|
||||
}
|
||||
return 0;
|
||||
}
|
||||
|
||||
} // namespace ime_pinyin
|
|
@ -0,0 +1,96 @@
|
|||
/*
|
||||
* Copyright (C) 2009 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef PINYINIME_INCLUDE_SPLPARSER_H__
|
||||
#define PINYINIME_INCLUDE_SPLPARSER_H__
|
||||
|
||||
#include "./dictdef.h"
|
||||
#include "./spellingtrie.h"
|
||||
|
||||
namespace ime_pinyin {
|
||||
|
||||
class SpellingParser {
|
||||
protected:
|
||||
const SpellingTrie *spl_trie_;
|
||||
|
||||
public:
|
||||
SpellingParser();
|
||||
|
||||
// Given a string, parse it into a spelling id stream.
|
||||
// If the whole string are sucessfully parsed, last_is_pre will be true;
|
||||
// if the whole string is not fullly parsed, last_is_pre will return whether
|
||||
// the last part of the string is a prefix of a full spelling string. For
|
||||
// example, given string "zhengzhon", "zhon" is not a valid speling, but it is
|
||||
// the prefix of "zhong".
|
||||
//
|
||||
// If splstr starts with a character not in ['a'-z'] (it is a split char),
|
||||
// return 0.
|
||||
// Split char can only appear in the middle of the string or at the end.
|
||||
uint16 splstr_to_idxs(const char *splstr, uint16 str_len, uint16 splidx[],
|
||||
uint16 start_pos[], uint16 max_size, bool &last_is_pre);
|
||||
|
||||
// Similar to splstr_to_idxs(), the only difference is that splstr_to_idxs()
|
||||
// convert single-character Yunmus into half ids, while this function converts
|
||||
// them into full ids.
|
||||
uint16 splstr_to_idxs_f(const char *splstr, uint16 str_len, uint16 splidx[],
|
||||
uint16 start_pos[], uint16 max_size, bool &last_is_pre);
|
||||
|
||||
// Similar to splstr_to_idxs(), the only difference is that this function
|
||||
// uses char16 instead of char8.
|
||||
uint16 splstr16_to_idxs(const char16 *splstr, uint16 str_len, uint16 splidx[],
|
||||
uint16 start_pos[], uint16 max_size, bool &last_is_pre);
|
||||
|
||||
// Similar to splstr_to_idxs_f(), the only difference is that this function
|
||||
// uses char16 instead of char8.
|
||||
uint16 splstr16_to_idxs_f(const char16 *splstr16, uint16 str_len,
|
||||
uint16 splidx[], uint16 start_pos[],
|
||||
uint16 max_size, bool &last_is_pre);
|
||||
|
||||
// If the given string is a spelling, return the id, others, return 0.
|
||||
// If the give string is a single char Yunmus like "A", and the char is
|
||||
// enabled in ShouZiMu mode, the returned spelling id will be a half id.
|
||||
// When the returned spelling id is a half id, *is_pre returns whether it
|
||||
// is a prefix of a full spelling string.
|
||||
uint16 get_splid_by_str(const char *splstr, uint16 str_len, bool *is_pre);
|
||||
|
||||
// If the given string is a spelling, return the id, others, return 0.
|
||||
// If the give string is a single char Yunmus like "a", no matter the char
|
||||
// is enabled in ShouZiMu mode or not, the returned spelling id will be
|
||||
// a full id.
|
||||
// When the returned spelling id is a half id, *p_is_pre returns whether it
|
||||
// is a prefix of a full spelling string.
|
||||
uint16 get_splid_by_str_f(const char *splstr, uint16 str_len, bool *is_pre);
|
||||
|
||||
// Splitter chars are not included.
|
||||
bool is_valid_to_parse(char ch);
|
||||
|
||||
// When auto-correction is not enabled, get_splid_by_str() will be called to
|
||||
// return the single result. When auto-correction is enabled, this function
|
||||
// will be called to get the results. Auto-correction is not ready.
|
||||
// full_id_num returns number of full spelling ids.
|
||||
// is_pre returns whether the given string is the prefix of a full spelling
|
||||
// string.
|
||||
// If splstr starts with a character not in [a-zA-Z] (it is a split char),
|
||||
// return 0.
|
||||
// Split char can only appear in the middle of the string or at the end.
|
||||
// The caller should guarantee NULL != splstr && str_len > 0 && NULL != splidx
|
||||
uint16 get_splids_parallel(const char *splstr, uint16 str_len,
|
||||
uint16 splidx[], uint16 max_size,
|
||||
uint16 &full_id_num, bool &is_pre);
|
||||
};
|
||||
}
|
||||
|
||||
#endif // PINYINIME_INCLUDE_SPLPARSER_H__
|
|
@ -0,0 +1,112 @@
|
|||
/*
|
||||
* Copyright (C) 2009 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "sync.h"
|
||||
#include <assert.h>
|
||||
#include <string.h>
|
||||
|
||||
#ifdef ___SYNC_ENABLED___
|
||||
|
||||
namespace ime_pinyin {
|
||||
|
||||
Sync::Sync()
|
||||
: userdict_(NULL),
|
||||
dictfile_(NULL),
|
||||
last_count_(0) {
|
||||
};
|
||||
|
||||
Sync::~Sync() {
|
||||
}
|
||||
|
||||
|
||||
bool Sync::begin(const char * filename) {
|
||||
if (userdict_) {
|
||||
finish();
|
||||
}
|
||||
|
||||
if (!filename) {
|
||||
return false;
|
||||
}
|
||||
|
||||
dictfile_ = strdup(filename);
|
||||
if (!dictfile_) {
|
||||
return false;
|
||||
}
|
||||
|
||||
userdict_ = new UserDict();
|
||||
if (!userdict_) {
|
||||
free(dictfile_);
|
||||
dictfile_ = NULL;
|
||||
return false;
|
||||
}
|
||||
|
||||
if (userdict_->load_dict((const char*)dictfile_, kUserDictIdStart,
|
||||
kUserDictIdEnd) == false) {
|
||||
delete userdict_;
|
||||
userdict_ = NULL;
|
||||
free(dictfile_);
|
||||
dictfile_ = NULL;
|
||||
return false;
|
||||
}
|
||||
|
||||
userdict_->set_limit(kUserDictMaxLemmaCount, kUserDictMaxLemmaSize, kUserDictRatio);
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
int Sync::put_lemmas(char16 * lemmas, int len) {
|
||||
return userdict_->put_lemmas_no_sync_from_utf16le_string(lemmas, len);
|
||||
}
|
||||
|
||||
int Sync::get_lemmas(char16 * str, int size) {
|
||||
return userdict_->get_sync_lemmas_in_utf16le_string_from_beginning(str, size, &last_count_);
|
||||
}
|
||||
|
||||
int Sync::get_last_got_count() {
|
||||
return last_count_;
|
||||
}
|
||||
|
||||
int Sync::get_total_count() {
|
||||
return userdict_->get_sync_count();
|
||||
}
|
||||
|
||||
void Sync::clear_last_got() {
|
||||
if (last_count_ < 0) {
|
||||
return;
|
||||
}
|
||||
userdict_->clear_sync_lemmas(0, last_count_);
|
||||
last_count_ = 0;
|
||||
}
|
||||
|
||||
void Sync::finish() {
|
||||
if (userdict_) {
|
||||
userdict_->close_dict();
|
||||
delete userdict_;
|
||||
userdict_ = NULL;
|
||||
free(dictfile_);
|
||||
dictfile_ = NULL;
|
||||
last_count_ = 0;
|
||||
}
|
||||
}
|
||||
|
||||
int Sync::get_capacity() {
|
||||
UserDict::UserDictStat stat;
|
||||
userdict_->state(&stat);
|
||||
return stat.limit_lemma_count - stat.lemma_count;
|
||||
}
|
||||
|
||||
}
|
||||
#endif
|
|
@ -0,0 +1,85 @@
|
|||
/*
|
||||
* Copyright (C) 2009 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef PINYINIME_INCLUDE_SYNC_H__
|
||||
#define PINYINIME_INCLUDE_SYNC_H__
|
||||
|
||||
#define ___SYNC_ENABLED___
|
||||
|
||||
#ifdef ___SYNC_ENABLED___
|
||||
|
||||
#include "userdict.h"
|
||||
|
||||
namespace ime_pinyin {
|
||||
|
||||
// Class for user dictionary synchronization
|
||||
// This class is not thread safe
|
||||
// Normal invoking flow will be
|
||||
// begin() ->
|
||||
// put_lemmas() x N ->
|
||||
// {
|
||||
// get_lemmas() ->
|
||||
// [ get_last_got_count() ] ->
|
||||
// clear_last_got() ->
|
||||
// } x N ->
|
||||
// finish()
|
||||
class Sync {
|
||||
public:
|
||||
Sync();
|
||||
~Sync();
|
||||
|
||||
static const int kUserDictMaxLemmaCount = 5000;
|
||||
static const int kUserDictMaxLemmaSize = 200000;
|
||||
static const int kUserDictRatio = 20;
|
||||
|
||||
bool begin(const char * filename);
|
||||
|
||||
// Merge lemmas downloaded from sync server into local dictionary
|
||||
// lemmas, lemmas string encoded in UTF16LE
|
||||
// len, length of lemmas string
|
||||
// Return how many lemmas merged successfully
|
||||
int put_lemmas(char16 * lemmas, int len);
|
||||
|
||||
// Get local new user lemmas into UTF16LE string
|
||||
// str, buffer ptr to store new user lemmas
|
||||
// size, size of buffer
|
||||
// Return length of returned buffer in measure of UTF16LE
|
||||
int get_lemmas(char16 * str, int size);
|
||||
|
||||
// Return lemmas count in last get_lemmas()
|
||||
int get_last_got_count();
|
||||
|
||||
// Return total lemmas count need get_lemmas()
|
||||
int get_total_count();
|
||||
|
||||
// Clear lemmas got by recent get_lemmas()
|
||||
void clear_last_got();
|
||||
|
||||
void finish();
|
||||
|
||||
int get_capacity();
|
||||
|
||||
private:
|
||||
UserDict * userdict_;
|
||||
char * dictfile_;
|
||||
int last_count_;
|
||||
};
|
||||
|
||||
}
|
||||
|
||||
#endif
|
||||
|
||||
#endif // PINYINIME_INCLUDE_SYNC_H__
|
File diff suppressed because it is too large
Load Diff
|
@ -0,0 +1,432 @@
|
|||
/*
|
||||
* Copyright (C) 2009 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef PINYINIME_INCLUDE_USERDICT_H__
|
||||
#define PINYINIME_INCLUDE_USERDICT_H__
|
||||
|
||||
#define ___CACHE_ENABLED___
|
||||
#define ___SYNC_ENABLED___
|
||||
#define ___PREDICT_ENABLED___
|
||||
|
||||
// Debug performance for operations
|
||||
// #define ___DEBUG_PERF___
|
||||
|
||||
#ifdef _WIN32
|
||||
#include <winsock.h> // timeval
|
||||
#else
|
||||
#include <pthread.h>
|
||||
#endif
|
||||
#include "atomdictbase.h"
|
||||
|
||||
namespace ime_pinyin {
|
||||
|
||||
class UserDict : public AtomDictBase {
|
||||
public:
|
||||
UserDict();
|
||||
~UserDict();
|
||||
|
||||
bool load_dict(const char *file_name, LemmaIdType start_id,
|
||||
LemmaIdType end_id);
|
||||
|
||||
bool close_dict();
|
||||
|
||||
size_t number_of_lemmas();
|
||||
|
||||
void reset_milestones(uint16 from_step, MileStoneHandle from_handle);
|
||||
|
||||
MileStoneHandle extend_dict(MileStoneHandle from_handle,
|
||||
const DictExtPara *dep, LmaPsbItem *lpi_items,
|
||||
size_t lpi_max, size_t *lpi_num);
|
||||
|
||||
size_t get_lpis(const uint16 *splid_str, uint16 splid_str_len,
|
||||
LmaPsbItem *lpi_items, size_t lpi_max);
|
||||
|
||||
uint16 get_lemma_str(LemmaIdType id_lemma, char16* str_buf,
|
||||
uint16 str_max);
|
||||
|
||||
uint16 get_lemma_splids(LemmaIdType id_lemma, uint16 *splids,
|
||||
uint16 splids_max, bool arg_valid);
|
||||
|
||||
size_t predict(const char16 last_hzs[], uint16 hzs_len,
|
||||
NPredictItem *npre_items, size_t npre_max,
|
||||
size_t b4_used);
|
||||
|
||||
// Full spelling ids are required
|
||||
LemmaIdType put_lemma(char16 lemma_str[], uint16 splids[],
|
||||
uint16 lemma_len, uint16 count);
|
||||
|
||||
LemmaIdType update_lemma(LemmaIdType lemma_id, int16 delta_count,
|
||||
bool selected);
|
||||
|
||||
LemmaIdType get_lemma_id(char16 lemma_str[], uint16 splids[],
|
||||
uint16 lemma_len);
|
||||
|
||||
LmaScoreType get_lemma_score(LemmaIdType lemma_id);
|
||||
|
||||
LmaScoreType get_lemma_score(char16 lemma_str[], uint16 splids[],
|
||||
uint16 lemma_len);
|
||||
|
||||
bool remove_lemma(LemmaIdType lemma_id);
|
||||
|
||||
size_t get_total_lemma_count();
|
||||
void set_total_lemma_count_of_others(size_t count);
|
||||
|
||||
void flush_cache();
|
||||
|
||||
void set_limit(uint32 max_lemma_count, uint32 max_lemma_size,
|
||||
uint32 reclaim_ratio);
|
||||
|
||||
void reclaim();
|
||||
|
||||
void defragment();
|
||||
|
||||
#ifdef ___SYNC_ENABLED___
|
||||
void clear_sync_lemmas(unsigned int start, unsigned int end);
|
||||
|
||||
int get_sync_count();
|
||||
|
||||
LemmaIdType put_lemma_no_sync(char16 lemma_str[], uint16 splids[],
|
||||
uint16 lemma_len, uint16 count, uint64 lmt);
|
||||
/**
|
||||
* Add lemmas encoded in UTF-16LE into dictionary without adding sync flag.
|
||||
*
|
||||
* @param lemmas in format of 'wo men,WM,0.32;da jia,DJ,0.12'
|
||||
* @param len length of lemmas string in UTF-16LE
|
||||
* @return newly added lemma count
|
||||
*/
|
||||
int put_lemmas_no_sync_from_utf16le_string(char16 * lemmas, int len);
|
||||
|
||||
/**
|
||||
* Get lemmas need sync to a UTF-16LE string of above format.
|
||||
* Note: input buffer (str) must not be too small. If str is too small to
|
||||
* contain single one lemma, there might be a dead loop.
|
||||
*
|
||||
* @param str buffer to write lemmas
|
||||
* @param size buffer size in UTF-16LE
|
||||
* @param count output value of lemma returned
|
||||
* @return UTF-16LE string length
|
||||
*/
|
||||
int get_sync_lemmas_in_utf16le_string_from_beginning(
|
||||
char16 * str, int size, int * count);
|
||||
|
||||
#endif
|
||||
|
||||
struct UserDictStat {
|
||||
uint32 version;
|
||||
const char * file_name;
|
||||
struct timeval load_time;
|
||||
struct timeval last_update;
|
||||
uint32 disk_size;
|
||||
uint32 lemma_count;
|
||||
uint32 lemma_size;
|
||||
uint32 delete_count;
|
||||
uint32 delete_size;
|
||||
#ifdef ___SYNC_ENABLED___
|
||||
uint32 sync_count;
|
||||
#endif
|
||||
uint32 reclaim_ratio;
|
||||
uint32 limit_lemma_count;
|
||||
uint32 limit_lemma_size;
|
||||
};
|
||||
|
||||
bool state(UserDictStat * stat);
|
||||
|
||||
private:
|
||||
uint32 total_other_nfreq_;
|
||||
struct timeval load_time_;
|
||||
LemmaIdType start_id_;
|
||||
uint32 version_;
|
||||
uint8 * lemmas_;
|
||||
|
||||
// In-Memory-Only flag for each lemma
|
||||
static const uint8 kUserDictLemmaFlagRemove = 1;
|
||||
// Inuse lemmas' offset
|
||||
uint32 * offsets_;
|
||||
// Highest bit in offset tells whether corresponding lemma is removed
|
||||
static const uint32 kUserDictOffsetFlagRemove = (1 << 31);
|
||||
// Maximum possible for the offset
|
||||
static const uint32 kUserDictOffsetMask = ~(kUserDictOffsetFlagRemove);
|
||||
// Bit width for last modified time, from 1 to 16
|
||||
static const uint32 kUserDictLMTBitWidth = 16;
|
||||
// Granularity for last modified time in second
|
||||
static const uint32 kUserDictLMTGranularity = 60 * 60 * 24 * 7;
|
||||
// Maximum frequency count
|
||||
static const uint16 kUserDictMaxFrequency = 0xFFFF;
|
||||
|
||||
#define COARSE_UTC(year, month, day, hour, minute, second) \
|
||||
( \
|
||||
(year - 1970) * 365 * 24 * 60 * 60 + \
|
||||
(month - 1) * 30 * 24 * 60 * 60 + \
|
||||
(day - 1) * 24 * 60 * 60 + \
|
||||
(hour - 0) * 60 * 60 + \
|
||||
(minute - 0) * 60 + \
|
||||
(second - 0) \
|
||||
)
|
||||
static const uint64 kUserDictLMTSince = COARSE_UTC(2009, 1, 1, 0, 0, 0);
|
||||
|
||||
// Correspond to offsets_
|
||||
uint32 * scores_;
|
||||
// Following two fields are only valid in memory
|
||||
uint32 * ids_;
|
||||
#ifdef ___PREDICT_ENABLED___
|
||||
uint32 * predicts_;
|
||||
#endif
|
||||
#ifdef ___SYNC_ENABLED___
|
||||
uint32 * syncs_;
|
||||
size_t sync_count_size_;
|
||||
#endif
|
||||
uint32 * offsets_by_id_;
|
||||
|
||||
size_t lemma_count_left_;
|
||||
size_t lemma_size_left_;
|
||||
|
||||
const char * dict_file_;
|
||||
|
||||
// Be sure size is 4xN
|
||||
struct UserDictInfo {
|
||||
// When limitation reached, how much percentage will be reclaimed (1 ~ 100)
|
||||
uint32 reclaim_ratio;
|
||||
// maximum lemma count, 0 means no limitation
|
||||
uint32 limit_lemma_count;
|
||||
// Maximum lemma size, it's different from
|
||||
// whole disk file size or in-mem dict size
|
||||
// 0 means no limitation
|
||||
uint32 limit_lemma_size;
|
||||
// Total lemma count including deleted and inuse
|
||||
// Also indicate offsets_ size
|
||||
uint32 lemma_count;
|
||||
// Total size of lemmas including used and freed
|
||||
uint32 lemma_size;
|
||||
// Freed lemma count
|
||||
uint32 free_count;
|
||||
// Freed lemma size in byte
|
||||
uint32 free_size;
|
||||
#ifdef ___SYNC_ENABLED___
|
||||
uint32 sync_count;
|
||||
#endif
|
||||
int32 total_nfreq;
|
||||
} dict_info_;
|
||||
|
||||
static const uint32 kUserDictVersion = 0x0ABCDEF0;
|
||||
|
||||
static const uint32 kUserDictPreAlloc = 32;
|
||||
static const uint32 kUserDictAverageNchar = 8;
|
||||
|
||||
enum UserDictState {
|
||||
// Keep in order
|
||||
USER_DICT_NONE = 0,
|
||||
USER_DICT_SYNC,
|
||||
#ifdef ___SYNC_ENABLED___
|
||||
USER_DICT_SYNC_DIRTY,
|
||||
#endif
|
||||
USER_DICT_SCORE_DIRTY,
|
||||
USER_DICT_OFFSET_DIRTY,
|
||||
USER_DICT_LEMMA_DIRTY,
|
||||
|
||||
USER_DICT_DEFRAGMENTED,
|
||||
} state_;
|
||||
|
||||
struct UserDictSearchable {
|
||||
uint16 splids_len;
|
||||
uint16 splid_start[kMaxLemmaSize];
|
||||
uint16 splid_count[kMaxLemmaSize];
|
||||
// Compact inital letters for both FuzzyCompareSpellId and cache system
|
||||
uint32 signature[kMaxLemmaSize / 4];
|
||||
};
|
||||
|
||||
#ifdef ___CACHE_ENABLED___
|
||||
enum UserDictCacheType {
|
||||
USER_DICT_CACHE,
|
||||
USER_DICT_MISS_CACHE,
|
||||
};
|
||||
|
||||
static const int kUserDictCacheSize = 4;
|
||||
static const int kUserDictMissCacheSize = kMaxLemmaSize - 1;
|
||||
|
||||
struct UserDictMissCache {
|
||||
uint32 signatures[kUserDictMissCacheSize][kMaxLemmaSize / 4];
|
||||
uint16 head, tail;
|
||||
} miss_caches_[kMaxLemmaSize];
|
||||
|
||||
struct UserDictCache {
|
||||
uint32 signatures[kUserDictCacheSize][kMaxLemmaSize / 4];
|
||||
uint32 offsets[kUserDictCacheSize];
|
||||
uint32 lengths[kUserDictCacheSize];
|
||||
// Ring buffer
|
||||
uint16 head, tail;
|
||||
} caches_[kMaxLemmaSize];
|
||||
|
||||
void cache_init();
|
||||
|
||||
void cache_push(UserDictCacheType type,
|
||||
UserDictSearchable *searchable,
|
||||
uint32 offset, uint32 length);
|
||||
|
||||
bool cache_hit(UserDictSearchable *searchable,
|
||||
uint32 *offset, uint32 *length);
|
||||
|
||||
bool load_cache(UserDictSearchable *searchable,
|
||||
uint32 *offset, uint32 *length);
|
||||
|
||||
void save_cache(UserDictSearchable *searchable,
|
||||
uint32 offset, uint32 length);
|
||||
|
||||
void reset_cache();
|
||||
|
||||
bool load_miss_cache(UserDictSearchable *searchable);
|
||||
|
||||
void save_miss_cache(UserDictSearchable *searchable);
|
||||
|
||||
void reset_miss_cache();
|
||||
#endif
|
||||
|
||||
LmaScoreType translate_score(int f);
|
||||
|
||||
int extract_score_freq(int raw_score);
|
||||
|
||||
uint64 extract_score_lmt(int raw_score);
|
||||
|
||||
inline int build_score(uint64 lmt, int freq);
|
||||
|
||||
inline int64 utf16le_atoll(uint16 *s, int len);
|
||||
|
||||
inline int utf16le_lltoa(int64 v, uint16 *s, int size);
|
||||
|
||||
LemmaIdType _put_lemma(char16 lemma_str[], uint16 splids[],
|
||||
uint16 lemma_len, uint16 count, uint64 lmt);
|
||||
|
||||
size_t _get_lpis(const uint16 *splid_str, uint16 splid_str_len,
|
||||
LmaPsbItem *lpi_items, size_t lpi_max, bool * need_extend);
|
||||
|
||||
int _get_lemma_score(char16 lemma_str[], uint16 splids[], uint16 lemma_len);
|
||||
|
||||
int _get_lemma_score(LemmaIdType lemma_id);
|
||||
|
||||
int is_fuzzy_prefix_spell_id(const uint16 * id1, uint16 len1,
|
||||
const UserDictSearchable *searchable);
|
||||
|
||||
bool is_prefix_spell_id(const uint16 * fullids,
|
||||
uint16 fulllen, const UserDictSearchable *searchable);
|
||||
|
||||
uint32 get_dict_file_size(UserDictInfo * info);
|
||||
|
||||
bool reset(const char *file);
|
||||
|
||||
bool validate(const char *file);
|
||||
|
||||
bool load(const char *file, LemmaIdType start_id);
|
||||
|
||||
bool is_valid_state();
|
||||
|
||||
bool is_valid_lemma_id(LemmaIdType id);
|
||||
|
||||
LemmaIdType get_max_lemma_id();
|
||||
|
||||
void set_lemma_flag(uint32 offset, uint8 flag);
|
||||
|
||||
char get_lemma_flag(uint32 offset);
|
||||
|
||||
char get_lemma_nchar(uint32 offset);
|
||||
|
||||
uint16 * get_lemma_spell_ids(uint32 offset);
|
||||
|
||||
uint16 * get_lemma_word(uint32 offset);
|
||||
|
||||
// Prepare searchable to fasten locate process
|
||||
void prepare_locate(UserDictSearchable *searchable,
|
||||
const uint16 * splids, uint16 len);
|
||||
|
||||
// Compare initial letters only
|
||||
int32 fuzzy_compare_spell_id(const uint16 * id1, uint16 len1,
|
||||
const UserDictSearchable *searchable);
|
||||
|
||||
// Compare exactly two spell ids
|
||||
// First argument must be a full id spell id
|
||||
bool equal_spell_id(const uint16 * fullids,
|
||||
uint16 fulllen, const UserDictSearchable *searchable);
|
||||
|
||||
// Find first item by initial letters
|
||||
int32 locate_first_in_offsets(const UserDictSearchable *searchable);
|
||||
|
||||
LemmaIdType append_a_lemma(char16 lemma_str[], uint16 splids[],
|
||||
uint16 lemma_len, uint16 count, uint64 lmt);
|
||||
|
||||
// Check if a lemma is in dictionary
|
||||
int32 locate_in_offsets(char16 lemma_str[],
|
||||
uint16 splid_str[], uint16 lemma_len);
|
||||
|
||||
bool remove_lemma_by_offset_index(int offset_index);
|
||||
#ifdef ___PREDICT_ENABLED___
|
||||
uint32 locate_where_to_insert_in_predicts(const uint16 * words,
|
||||
int lemma_len);
|
||||
|
||||
int32 locate_first_in_predicts(const uint16 * words, int lemma_len);
|
||||
|
||||
void remove_lemma_from_predict_list(uint32 offset);
|
||||
#endif
|
||||
#ifdef ___SYNC_ENABLED___
|
||||
void queue_lemma_for_sync(LemmaIdType id);
|
||||
|
||||
void remove_lemma_from_sync_list(uint32 offset);
|
||||
|
||||
void write_back_sync(int fd);
|
||||
#endif
|
||||
void write_back_score(int fd);
|
||||
void write_back_offset(int fd);
|
||||
void write_back_lemma(int fd);
|
||||
void write_back_all(int fd);
|
||||
void write_back();
|
||||
|
||||
struct UserDictScoreOffsetPair {
|
||||
int score;
|
||||
uint32 offset_index;
|
||||
};
|
||||
|
||||
inline void swap(UserDictScoreOffsetPair * sop, int i, int j);
|
||||
|
||||
void shift_down(UserDictScoreOffsetPair * sop, int i, int n);
|
||||
|
||||
// On-disk format for each lemma
|
||||
// +-------------+
|
||||
// | Version (4) |
|
||||
// +-------------+
|
||||
// +-----------+-----------+--------------------+-------------------+
|
||||
// | Spare (1) | Nchar (1) | Splids (2 x Nchar) | Lemma (2 x Nchar) |
|
||||
// +-----------+-----------+--------------------+-------------------+
|
||||
// ...
|
||||
// +-----------------------+ +-------------+ <---Offset of offset
|
||||
// | Offset1 by_splids (4) | ... | OffsetN (4) |
|
||||
// +-----------------------+ +-------------+
|
||||
#ifdef ___PREDICT_ENABLED___
|
||||
// +----------------------+ +-------------+
|
||||
// | Offset1 by_lemma (4) | ... | OffsetN (4) |
|
||||
// +----------------------+ +-------------+
|
||||
#endif
|
||||
// +------------+ +------------+
|
||||
// | Score1 (4) | ... | ScoreN (4) |
|
||||
// +------------+ +------------+
|
||||
#ifdef ___SYNC_ENABLED___
|
||||
// +-------------+ +-------------+
|
||||
// | NewAdd1 (4) | ... | NewAddN (4) |
|
||||
// +-------------+ +-------------+
|
||||
#endif
|
||||
// +----------------+
|
||||
// | Dict Info (4x) |
|
||||
// +----------------+
|
||||
};
|
||||
}
|
||||
|
||||
#endif
|
|
@ -0,0 +1,182 @@
|
|||
/*
|
||||
* Copyright (C) 2009 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include <stdlib.h>
|
||||
#include "utf16char.h"
|
||||
|
||||
namespace ime_pinyin {
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
char16* utf16_strtok(char16 *utf16_str, size_t *token_size,
|
||||
char16 **utf16_str_next) {
|
||||
if (NULL == utf16_str || NULL == token_size || NULL == utf16_str_next) {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Skip the splitters
|
||||
size_t pos = 0;
|
||||
while ((char16)' ' == utf16_str[pos] || (char16)'\n' == utf16_str[pos]
|
||||
|| (char16)'\t' == utf16_str[pos])
|
||||
pos++;
|
||||
|
||||
utf16_str += pos;
|
||||
pos = 0;
|
||||
|
||||
while ((char16)'\0' != utf16_str[pos] && (char16)' ' != utf16_str[pos]
|
||||
&& (char16)'\n' != utf16_str[pos]
|
||||
&& (char16)'\t' != utf16_str[pos]) {
|
||||
pos++;
|
||||
}
|
||||
|
||||
char16 *ret_val = utf16_str;
|
||||
if ((char16)'\0' == utf16_str[pos]) {
|
||||
*utf16_str_next = NULL;
|
||||
if (0 == pos)
|
||||
return NULL;
|
||||
} else {
|
||||
*utf16_str_next = utf16_str + pos + 1;
|
||||
}
|
||||
|
||||
utf16_str[pos] = (char16)'\0';
|
||||
*token_size = pos;
|
||||
|
||||
return ret_val;
|
||||
}
|
||||
|
||||
int utf16_atoi(const char16 *utf16_str) {
|
||||
if (NULL == utf16_str)
|
||||
return 0;
|
||||
|
||||
int value = 0;
|
||||
int sign = 1;
|
||||
size_t pos = 0;
|
||||
|
||||
if ((char16)'-' == utf16_str[pos]) {
|
||||
sign = -1;
|
||||
pos++;
|
||||
}
|
||||
|
||||
while ((char16)'0' <= utf16_str[pos] &&
|
||||
(char16)'9' >= utf16_str[pos]) {
|
||||
value = value * 10 + static_cast<int>(utf16_str[pos] - (char16)'0');
|
||||
pos++;
|
||||
}
|
||||
|
||||
return value*sign;
|
||||
}
|
||||
|
||||
float utf16_atof(const char16 *utf16_str) {
|
||||
// A temporary implemetation.
|
||||
char char8[256];
|
||||
if (utf16_strlen(utf16_str) >= 256) return 0;
|
||||
|
||||
utf16_strcpy_tochar(char8, utf16_str);
|
||||
return atof(char8);
|
||||
}
|
||||
|
||||
size_t utf16_strlen(const char16 *utf16_str) {
|
||||
if (NULL == utf16_str)
|
||||
return 0;
|
||||
|
||||
size_t size = 0;
|
||||
while ((char16)'\0' != utf16_str[size])
|
||||
size++;
|
||||
return size;
|
||||
}
|
||||
|
||||
int utf16_strcmp(const char16* str1, const char16* str2) {
|
||||
size_t pos = 0;
|
||||
while (str1[pos] == str2[pos] && (char16)'\0' != str1[pos])
|
||||
pos++;
|
||||
|
||||
return static_cast<int>(str1[pos]) - static_cast<int>(str2[pos]);
|
||||
}
|
||||
|
||||
int utf16_strncmp(const char16 *str1, const char16 *str2, size_t size) {
|
||||
size_t pos = 0;
|
||||
while (pos < size && str1[pos] == str2[pos] && (char16)'\0' != str1[pos])
|
||||
pos++;
|
||||
|
||||
if (pos == size)
|
||||
return 0;
|
||||
|
||||
return static_cast<int>(str1[pos]) - static_cast<int>(str2[pos]);
|
||||
}
|
||||
|
||||
// we do not consider overlapping
|
||||
char16* utf16_strcpy(char16 *dst, const char16 *src) {
|
||||
if (NULL == src || NULL == dst)
|
||||
return NULL;
|
||||
|
||||
char16* cp = dst;
|
||||
|
||||
while ((char16)'\0' != *src) {
|
||||
*cp = *src;
|
||||
cp++;
|
||||
src++;
|
||||
}
|
||||
|
||||
*cp = *src;
|
||||
|
||||
return dst;
|
||||
}
|
||||
|
||||
char16* utf16_strncpy(char16 *dst, const char16 *src, size_t size) {
|
||||
if (NULL == src || NULL == dst || 0 == size)
|
||||
return NULL;
|
||||
|
||||
if (src == dst)
|
||||
return dst;
|
||||
|
||||
char16* cp = dst;
|
||||
|
||||
if (dst < src || (dst > src && dst >= src + size)) {
|
||||
while (size-- && (*cp++ = *src++))
|
||||
;
|
||||
} else {
|
||||
cp += size - 1;
|
||||
src += size - 1;
|
||||
while (size-- && (*cp-- == *src--))
|
||||
;
|
||||
}
|
||||
return dst;
|
||||
}
|
||||
|
||||
// We do not handle complicated cases like overlapping, because in this
|
||||
// codebase, it is not necessary.
|
||||
char* utf16_strcpy_tochar(char *dst, const char16 *src) {
|
||||
if (NULL == src || NULL == dst)
|
||||
return NULL;
|
||||
|
||||
char* cp = dst;
|
||||
|
||||
while ((char16)'\0' != *src) {
|
||||
*cp = static_cast<char>(*src);
|
||||
cp++;
|
||||
src++;
|
||||
}
|
||||
*cp = *src;
|
||||
|
||||
return dst;
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
} // namespace ime_pinyin
|
|
@ -0,0 +1,56 @@
|
|||
/*
|
||||
* Copyright (C) 2009 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef PINYINIME_INCLUDE_UTF16CHAR_H__
|
||||
#define PINYINIME_INCLUDE_UTF16CHAR_H__
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
namespace ime_pinyin {
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef unsigned short char16;
|
||||
|
||||
// Get a token from utf16_str,
|
||||
// Returned pointer is a '\0'-terminated utf16 string, or NULL
|
||||
// *utf16_str_next returns the next part of the string for further tokenizing
|
||||
char16* utf16_strtok(char16 *utf16_str, size_t *token_size,
|
||||
char16 **utf16_str_next);
|
||||
|
||||
int utf16_atoi(const char16 *utf16_str);
|
||||
|
||||
float utf16_atof(const char16 *utf16_str);
|
||||
|
||||
size_t utf16_strlen(const char16 *utf16_str);
|
||||
|
||||
int utf16_strcmp(const char16 *str1, const char16 *str2);
|
||||
int utf16_strncmp(const char16 *str1, const char16 *str2, size_t size);
|
||||
|
||||
char16* utf16_strcpy(char16 *dst, const char16 *src);
|
||||
char16* utf16_strncpy(char16 *dst, const char16 *src, size_t size);
|
||||
|
||||
|
||||
char* utf16_strcpy_tochar(char *dst, const char16 *src);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif // PINYINIME_INCLUDE_UTF16CHAR_H__
|
|
@ -0,0 +1,131 @@
|
|||
/*
|
||||
* Copyright (C) 2009 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#include "utf16reader.h"
|
||||
|
||||
namespace ime_pinyin {
|
||||
|
||||
#define MIN_BUF_LEN 128
|
||||
#define MAX_BUF_LEN 65535
|
||||
|
||||
Utf16Reader::Utf16Reader() {
|
||||
fp_ = NULL;
|
||||
buffer_ = NULL;
|
||||
buffer_total_len_ = 0;
|
||||
buffer_next_pos_ = 0;
|
||||
buffer_valid_len_ = 0;
|
||||
}
|
||||
|
||||
Utf16Reader::~Utf16Reader() {
|
||||
if (NULL != fp_)
|
||||
fclose(fp_);
|
||||
|
||||
if (NULL != buffer_)
|
||||
delete [] buffer_;
|
||||
}
|
||||
|
||||
|
||||
bool Utf16Reader::open(const char* filename, size_t buffer_len) {
|
||||
if (filename == NULL)
|
||||
return false;
|
||||
|
||||
if (buffer_len < MIN_BUF_LEN)
|
||||
buffer_len = MIN_BUF_LEN;
|
||||
else if (buffer_len > MAX_BUF_LEN)
|
||||
buffer_len = MAX_BUF_LEN;
|
||||
|
||||
buffer_total_len_ = buffer_len;
|
||||
|
||||
if (NULL != buffer_)
|
||||
delete [] buffer_;
|
||||
buffer_ = new char16[buffer_total_len_];
|
||||
if (NULL == buffer_)
|
||||
return false;
|
||||
|
||||
if ((fp_ = fopen(filename, "rb")) == NULL)
|
||||
return false;
|
||||
|
||||
// the UTF16 file header, skip
|
||||
char16 header;
|
||||
if (fread(&header, sizeof(header), 1, fp_) != 1 || header != 0xfeff) {
|
||||
fclose(fp_);
|
||||
fp_ = NULL;
|
||||
return false;
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
char16* Utf16Reader::readline(char16* read_buf, size_t max_len) {
|
||||
if (NULL == fp_ || NULL == read_buf || 0 == max_len)
|
||||
return NULL;
|
||||
|
||||
size_t ret_len = 0;
|
||||
|
||||
do {
|
||||
if (buffer_valid_len_ == 0) {
|
||||
buffer_next_pos_ = 0;
|
||||
buffer_valid_len_ = fread(buffer_, sizeof(char16),
|
||||
buffer_total_len_, fp_);
|
||||
if (buffer_valid_len_ == 0) {
|
||||
if (0 == ret_len)
|
||||
return NULL;
|
||||
read_buf[ret_len] = (char16)'\0';
|
||||
return read_buf;
|
||||
}
|
||||
}
|
||||
|
||||
for (size_t i = 0; i < buffer_valid_len_; i++) {
|
||||
if (i == max_len - 1 ||
|
||||
buffer_[buffer_next_pos_ + i] == (char16)'\n') {
|
||||
if (ret_len + i > 0 && read_buf[ret_len + i - 1] == (char16)'\r') {
|
||||
read_buf[ret_len + i - 1] = (char16)'\0';
|
||||
} else {
|
||||
read_buf[ret_len + i] = (char16)'\0';
|
||||
}
|
||||
|
||||
i++;
|
||||
buffer_next_pos_ += i;
|
||||
buffer_valid_len_ -= i;
|
||||
if (buffer_next_pos_ == buffer_total_len_) {
|
||||
buffer_next_pos_ = 0;
|
||||
buffer_valid_len_ = 0;
|
||||
}
|
||||
return read_buf;
|
||||
} else {
|
||||
read_buf[ret_len + i] = buffer_[buffer_next_pos_ + i];
|
||||
}
|
||||
}
|
||||
|
||||
ret_len += buffer_valid_len_;
|
||||
buffer_valid_len_ = 0;
|
||||
} while (true);
|
||||
|
||||
// Never reach here
|
||||
return NULL;
|
||||
}
|
||||
|
||||
bool Utf16Reader::close() {
|
||||
if (NULL != fp_)
|
||||
fclose(fp_);
|
||||
fp_ = NULL;
|
||||
|
||||
if (NULL != buffer_)
|
||||
delete [] buffer_;
|
||||
buffer_ = NULL;
|
||||
return true;
|
||||
}
|
||||
} // namespace ime_pinyin
|
|
@ -0,0 +1,48 @@
|
|||
/*
|
||||
* Copyright (C) 2009 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef PINYINIME_INCLUDE_UTF16READER_H__
|
||||
#define PINYINIME_INCLUDE_UTF16READER_H__
|
||||
|
||||
#include <stdio.h>
|
||||
#include "./utf16char.h"
|
||||
|
||||
namespace ime_pinyin {
|
||||
|
||||
class Utf16Reader {
|
||||
private:
|
||||
FILE *fp_;
|
||||
char16 *buffer_;
|
||||
size_t buffer_total_len_;
|
||||
size_t buffer_next_pos_;
|
||||
|
||||
// Always less than buffer_total_len_ - buffer_next_pos_
|
||||
size_t buffer_valid_len_;
|
||||
|
||||
public:
|
||||
Utf16Reader();
|
||||
~Utf16Reader();
|
||||
|
||||
// filename is the name of the file to open.
|
||||
// buffer_len specifies how long buffer should be allocated to speed up the
|
||||
// future reading
|
||||
bool open(const char* filename, size_t buffer_len);
|
||||
char16* readline(char16* read_buf, size_t max_len);
|
||||
bool close();
|
||||
};
|
||||
}
|
||||
|
||||
#endif // PINYINIME_INCLUDE_UTF16READER_H__
|
Binary file not shown.
|
@ -0,0 +1,157 @@
|
|||
/*
|
||||
* Copyright (C) 2009 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef PINYINIME_INCLUDE_DICTDEF_H__
|
||||
#define PINYINIME_INCLUDE_DICTDEF_H__
|
||||
|
||||
#include <stdlib.h>
|
||||
#include "./utf16char.h"
|
||||
|
||||
namespace ime_pinyin {
|
||||
|
||||
// Enable the following line when building the binary dictionary model.
|
||||
// #define ___BUILD_MODEL___
|
||||
|
||||
typedef unsigned char uint8;
|
||||
typedef unsigned short uint16;
|
||||
typedef unsigned int uint32;
|
||||
|
||||
typedef signed char int8;
|
||||
typedef short int16;
|
||||
typedef int int32;
|
||||
typedef long long int64;
|
||||
typedef unsigned long long uint64;
|
||||
|
||||
const bool kPrintDebug0 = false;
|
||||
const bool kPrintDebug1 = false;
|
||||
const bool kPrintDebug2 = false;
|
||||
|
||||
// The max length of a lemma.
|
||||
const size_t kMaxLemmaSize = 8;
|
||||
|
||||
// The max length of a Pinyin (spelling).
|
||||
const size_t kMaxPinyinSize = 6;
|
||||
|
||||
// The number of half spelling ids. For Chinese Pinyin, there 30 half ids.
|
||||
// See SpellingTrie.h for details.
|
||||
const size_t kHalfSpellingIdNum = 29;
|
||||
|
||||
// The maximum number of full spellings. For Chinese Pinyin, there are only
|
||||
// about 410 spellings.
|
||||
// If change this value is bigger(needs more bits), please also update
|
||||
// other structures like SpellingNode, to make sure than a spelling id can be
|
||||
// stored.
|
||||
// -1 is because that 0 is never used.
|
||||
const size_t kMaxSpellingNum = 512 - kHalfSpellingIdNum - 1;
|
||||
const size_t kMaxSearchSteps = 40;
|
||||
|
||||
// One character predicts its following characters.
|
||||
const size_t kMaxPredictSize = (kMaxLemmaSize - 1);
|
||||
|
||||
// LemmaIdType must always be size_t.
|
||||
typedef size_t LemmaIdType;
|
||||
const size_t kLemmaIdSize = 3; // Actually, a Id occupies 3 bytes in storage.
|
||||
const size_t kLemmaIdComposing = 0xffffff;
|
||||
|
||||
typedef uint16 LmaScoreType;
|
||||
typedef uint16 KeyScoreType;
|
||||
|
||||
// Number of items with highest score are kept for prediction purpose.
|
||||
const size_t kTopScoreLemmaNum = 10;
|
||||
|
||||
const size_t kMaxPredictNumByGt3 = 1;
|
||||
const size_t kMaxPredictNumBy3 = 2;
|
||||
const size_t kMaxPredictNumBy2 = 2;
|
||||
|
||||
// The last lemma id (included) for the system dictionary. The system
|
||||
// dictionary's ids always start from 1.
|
||||
const LemmaIdType kSysDictIdEnd = 500000;
|
||||
|
||||
// The first lemma id for the user dictionary.
|
||||
const LemmaIdType kUserDictIdStart = 500001;
|
||||
|
||||
// The last lemma id (included) for the user dictionary.
|
||||
const LemmaIdType kUserDictIdEnd = 600000;
|
||||
|
||||
typedef struct {
|
||||
uint16 half_splid:5;
|
||||
uint16 full_splid:11;
|
||||
} SpellingId, *PSpellingId;
|
||||
|
||||
|
||||
/**
|
||||
* We use different node types for different layers
|
||||
* Statistical data of the building result for a testing dictionary:
|
||||
* root, level 0, level 1, level 2, level 3
|
||||
* max son num of one node: 406 280 41 2 -
|
||||
* max homo num of one node: 0 90 23 2 2
|
||||
* total node num of a layer: 1 406 31766 13516 993
|
||||
* total homo num of a layer: 9 5674 44609 12667 995
|
||||
*
|
||||
* The node number for root and level 0 won't be larger than 500
|
||||
* According to the information above, two kinds of nodes can be used; one for
|
||||
* root and level 0, the other for these layers deeper than 0.
|
||||
*
|
||||
* LE = less and equal,
|
||||
* A node occupies 16 bytes. so, totallly less than 16 * 500 = 8K
|
||||
*/
|
||||
struct LmaNodeLE0 {
|
||||
uint32 son_1st_off;
|
||||
uint32 homo_idx_buf_off;
|
||||
uint16 spl_idx;
|
||||
uint16 num_of_son;
|
||||
uint16 num_of_homo;
|
||||
};
|
||||
|
||||
/**
|
||||
* GE = great and equal
|
||||
* A node occupies 8 bytes.
|
||||
*/
|
||||
struct LmaNodeGE1 {
|
||||
uint16 son_1st_off_l; // Low bits of the son_1st_off
|
||||
uint16 homo_idx_buf_off_l; // Low bits of the homo_idx_buf_off_1
|
||||
uint16 spl_idx;
|
||||
unsigned char num_of_son; // number of son nodes
|
||||
unsigned char num_of_homo; // number of homo words
|
||||
unsigned char son_1st_off_h; // high bits of the son_1st_off
|
||||
unsigned char homo_idx_buf_off_h; // high bits of the homo_idx_buf_off
|
||||
};
|
||||
|
||||
#ifdef ___BUILD_MODEL___
|
||||
struct SingleCharItem {
|
||||
float freq;
|
||||
char16 hz;
|
||||
SpellingId splid;
|
||||
};
|
||||
|
||||
struct LemmaEntry {
|
||||
LemmaIdType idx_by_py;
|
||||
LemmaIdType idx_by_hz;
|
||||
char16 hanzi_str[kMaxLemmaSize + 1];
|
||||
|
||||
// The SingleCharItem id for each Hanzi.
|
||||
uint16 hanzi_scis_ids[kMaxLemmaSize];
|
||||
|
||||
uint16 spl_idx_arr[kMaxLemmaSize + 1];
|
||||
char pinyin_str[kMaxLemmaSize][kMaxPinyinSize + 1];
|
||||
unsigned char hz_str_len;
|
||||
float freq;
|
||||
};
|
||||
#endif // ___BUILD_MODEL___
|
||||
|
||||
} // namespace ime_pinyin
|
||||
|
||||
#endif // PINYINIME_INCLUDE_DICTDEF_H__
|
Binary file not shown.
|
@ -0,0 +1,223 @@
|
|||
/*
|
||||
* Copyright (C) 2009 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef PINYINIME_INCLUDE_ANDPYIME_H__
|
||||
#define PINYINIME_INCLUDE_ANDPYIME_H__
|
||||
|
||||
#include <stdlib.h>
|
||||
#include "./dictdef.h"
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
namespace ime_pinyin {
|
||||
|
||||
/**
|
||||
* Open the decoder engine via the system and user dictionary file names.
|
||||
*
|
||||
* @param fn_sys_dict The file name of the system dictionary.
|
||||
* @param fn_usr_dict The file name of the user dictionary.
|
||||
* @return true if open the decoder engine successfully.
|
||||
*/
|
||||
bool im_open_decoder(const char *fn_sys_dict, const char *fn_usr_dict);
|
||||
|
||||
/**
|
||||
* Open the decoder engine via the system dictionary FD and user dictionary
|
||||
* file name. Because on Android, the system dictionary is embedded in the
|
||||
* whole application apk file.
|
||||
*
|
||||
* @param sys_fd The file in which the system dictionary is embedded.
|
||||
* @param start_offset The starting position of the system dictionary in the
|
||||
* file sys_fd.
|
||||
* @param length The length of the system dictionary in the file sys_fd,
|
||||
* counted in byte.
|
||||
* @return true if succeed.
|
||||
*/
|
||||
bool im_open_decoder_fd(int sys_fd, long start_offset, long length,
|
||||
const char *fn_usr_dict);
|
||||
|
||||
/**
|
||||
* Close the decoder engine.
|
||||
*/
|
||||
void im_close_decoder();
|
||||
|
||||
/**
|
||||
* Set maximum limitations for decoding. If this function is not called,
|
||||
* default values will be used. For example, due to screen size limitation,
|
||||
* the UI engine of the IME can only show a certain number of letters(input)
|
||||
* to decode, and a certain number of Chinese characters(output). If after
|
||||
* user adds a new letter, the input or the output string is longer than the
|
||||
* limitations, the engine will discard the recent letter.
|
||||
*
|
||||
* @param max_sps_len Maximum length of the spelling string(Pinyin string).
|
||||
* @max_hzs_len Maximum length of the decoded Chinese character string.
|
||||
*/
|
||||
void im_set_max_lens(size_t max_sps_len, size_t max_hzs_len);
|
||||
|
||||
/**
|
||||
* Flush cached data to persistent memory. Because at runtime, in order to
|
||||
* achieve best performance, some data is only store in memory.
|
||||
*/
|
||||
void im_flush_cache();
|
||||
|
||||
/**
|
||||
* Use a spelling string(Pinyin string) to search. The engine will try to do
|
||||
* an incremental search based on its previous search result, so if the new
|
||||
* string has the same prefix with the previous one stored in the decoder,
|
||||
* the decoder will only continue the search from the end of the prefix.
|
||||
* If the caller needs to do a brand new search, please call im_reset_search()
|
||||
* first. Calling im_search() is equivalent to calling im_add_letter() one by
|
||||
* one.
|
||||
*
|
||||
* @param sps_buf The spelling string buffer to decode.
|
||||
* @param sps_len The length of the spelling string buffer.
|
||||
* @return The number of candidates.
|
||||
*/
|
||||
size_t im_search(const char* sps_buf, size_t sps_len);
|
||||
|
||||
/**
|
||||
* Make a delete operation in the current search result, and make research if
|
||||
* necessary.
|
||||
*
|
||||
* @param pos The posistion of char in spelling string to delete, or the
|
||||
* position of spelling id in result string to delete.
|
||||
* @param is_pos_in_splid Indicate whether the pos parameter is the position
|
||||
* in the spelling string, or the position in the result spelling id string.
|
||||
* @return The number of candidates.
|
||||
*/
|
||||
size_t im_delsearch(size_t pos, bool is_pos_in_splid,
|
||||
bool clear_fixed_this_step);
|
||||
|
||||
/**
|
||||
* Reset the previous search result.
|
||||
*/
|
||||
void im_reset_search();
|
||||
|
||||
/**
|
||||
* Add a Pinyin letter to the current spelling string kept by decoder. If the
|
||||
* decoder fails in adding the letter, it will do nothing. im_get_sps_str()
|
||||
* can be used to get the spelling string kept by decoder currently.
|
||||
*
|
||||
* @param ch The letter to add.
|
||||
* @return The number of candidates.
|
||||
*/
|
||||
size_t im_add_letter(char ch);
|
||||
|
||||
/**
|
||||
* Get the spelling string kept by the decoder.
|
||||
*
|
||||
* @param decoded_len Used to return how many characters in the spelling
|
||||
* string is successfully parsed.
|
||||
* @return The spelling string kept by the decoder.
|
||||
*/
|
||||
const char *im_get_sps_str(size_t *decoded_len);
|
||||
|
||||
/**
|
||||
* Get a candidate(or choice) string.
|
||||
*
|
||||
* @param cand_id The id to get a candidate. Started from 0. Usually, id 0
|
||||
* is a sentence-level candidate.
|
||||
* @param cand_str The buffer to store the candidate.
|
||||
* @param max_len The maximum length of the buffer.
|
||||
* @return cand_str if succeeds, otherwise NULL.
|
||||
*/
|
||||
char16* im_get_candidate(size_t cand_id, char16* cand_str,
|
||||
size_t max_len);
|
||||
|
||||
/**
|
||||
* Get the segmentation information(the starting positions) of the spelling
|
||||
* string.
|
||||
*
|
||||
* @param spl_start Used to return the starting posistions.
|
||||
* @return The number of spelling ids. If it is L, there will be L+1 valid
|
||||
* elements in spl_start, and spl_start[L] is the posistion after the end of
|
||||
* the last spelling id.
|
||||
*/
|
||||
size_t im_get_spl_start_pos(const uint16 *&spl_start);
|
||||
|
||||
/**
|
||||
* Choose a candidate and make it fixed. If the candidate does not match
|
||||
* the end of all spelling ids, new candidates will be provided from the
|
||||
* first unfixed position. If the candidate matches the end of the all
|
||||
* spelling ids, there will be only one new candidates, or the whole fixed
|
||||
* sentence.
|
||||
*
|
||||
* @param cand_id The id of candidate to select and make it fixed.
|
||||
* @return The number of candidates. If after the selection, the whole result
|
||||
* string has been fixed, there will be only one candidate.
|
||||
*/
|
||||
size_t im_choose(size_t cand_id);
|
||||
|
||||
/**
|
||||
* Cancel the last selection, or revert the last operation of im_choose().
|
||||
*
|
||||
* @return The number of candidates.
|
||||
*/
|
||||
size_t im_cancel_last_choice();
|
||||
|
||||
/**
|
||||
* Get the number of fixed spelling ids, or Chinese characters.
|
||||
*
|
||||
* @return The number of fixed spelling ids, of Chinese characters.
|
||||
*/
|
||||
size_t im_get_fixed_len();
|
||||
|
||||
/**
|
||||
* Cancel the input state and reset the search workspace.
|
||||
*/
|
||||
bool im_cancel_input();
|
||||
|
||||
/**
|
||||
* Get prediction candiates based on the given fixed Chinese string as the
|
||||
* history.
|
||||
*
|
||||
* @param his_buf The history buffer to do the prediction. It should be ended
|
||||
* with '\0'.
|
||||
* @param pre_buf Used to return prediction result list.
|
||||
* @return The number of predicted result string.
|
||||
*/
|
||||
size_t im_get_predicts(const char16 *his_buf,
|
||||
char16 (*&pre_buf)[kMaxPredictSize + 1]);
|
||||
|
||||
/**
|
||||
* Enable Shengmus in ShouZiMu mode.
|
||||
*/
|
||||
void im_enable_shm_as_szm(bool enable);
|
||||
|
||||
/**
|
||||
* Enable Yunmus in ShouZiMu mode.
|
||||
*/
|
||||
void im_enable_ym_as_szm(bool enable);
|
||||
|
||||
/**
|
||||
* Initializes or uninitializes the user dictionary.
|
||||
*
|
||||
* @param fn_usr_dict The file name of the user dictionary.
|
||||
*/
|
||||
void im_init_user_dictionary(const char *fn_usr_dict);
|
||||
|
||||
/**
|
||||
* Returns the current status of user dictinary.
|
||||
*/
|
||||
bool im_is_user_dictionary_enabled(void);
|
||||
}
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif // PINYINIME_INCLUDE_ANDPYIME_H__
|
|
@ -0,0 +1,56 @@
|
|||
/*
|
||||
* Copyright (C) 2009 The Android Open Source Project
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* http://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
#ifndef PINYINIME_INCLUDE_UTF16CHAR_H__
|
||||
#define PINYINIME_INCLUDE_UTF16CHAR_H__
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
namespace ime_pinyin {
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
typedef unsigned short char16;
|
||||
|
||||
// Get a token from utf16_str,
|
||||
// Returned pointer is a '\0'-terminated utf16 string, or NULL
|
||||
// *utf16_str_next returns the next part of the string for further tokenizing
|
||||
char16* utf16_strtok(char16 *utf16_str, size_t *token_size,
|
||||
char16 **utf16_str_next);
|
||||
|
||||
int utf16_atoi(const char16 *utf16_str);
|
||||
|
||||
float utf16_atof(const char16 *utf16_str);
|
||||
|
||||
size_t utf16_strlen(const char16 *utf16_str);
|
||||
|
||||
int utf16_strcmp(const char16 *str1, const char16 *str2);
|
||||
int utf16_strncmp(const char16 *str1, const char16 *str2, size_t size);
|
||||
|
||||
char16* utf16_strcpy(char16 *dst, const char16 *src);
|
||||
char16* utf16_strncpy(char16 *dst, const char16 *src, size_t size);
|
||||
|
||||
|
||||
char* utf16_strcpy_tochar(char *dst, const char16 *src);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif // PINYINIME_INCLUDE_UTF16CHAR_H__
|
|
@ -0,0 +1,557 @@
|
|||
#include "keyboardform.h"
|
||||
#include <QLabel>
|
||||
#include <QPushButton>
|
||||
#include <QVBoxLayout>
|
||||
#include <QHBoxLayout>
|
||||
#include <QFontDatabase>
|
||||
#include <QFile>
|
||||
#include <QApplication>
|
||||
#include "pinyinime.h"
|
||||
|
||||
using namespace ime_pinyin;
|
||||
|
||||
#define chinesecharacters_number 7
|
||||
const char *keyboard_characters = "qwertyuiopasdfghjklzxcvbnm,.?";
|
||||
const QString keyboard_symbols[] = {"1", "2", "3", "4", "5", "6", "7", "8", "9", "0",
|
||||
"@", "#", "_", "\"", "“", "”", ",", ",", ".", "。",
|
||||
";", ";", ":", ":", "'", "’", "、", "!", "!",
|
||||
"~", "~", "+", "-", "*", "/", "=", "÷", "×", "√",
|
||||
"`", "?", "^", "&&", "%", "|", "(", ")", "(", ")",
|
||||
"[", "]", "【", "】", "{", "}", "<", ">", "《",
|
||||
"》", "$", "€", "£", "¢", "¥", "§", "—", "/", "\",
|
||||
"·", "……", "——", "→", "←", "↑", "↓", "■", "□", "●",
|
||||
"○", "『", "』", "「", "」", "★", "☆", "◆", "◇"}; //29*3
|
||||
|
||||
KeyboardForm::KeyboardForm(QWidget *parent)
|
||||
: QWidget(parent)
|
||||
{
|
||||
character_btns_list.clear();
|
||||
current_mode = InputMode::en;
|
||||
upper_mode = false;
|
||||
m_symbol_page = 0;
|
||||
|
||||
this->setFixedSize(800,250);
|
||||
int keyboard_btn_width = this->width()/11.5;
|
||||
int keyboard_btn_height = this->height()/5.0;
|
||||
|
||||
//设置主窗体样式
|
||||
this->setAttribute(Qt::WA_TranslucentBackground);
|
||||
this->setWindowFlags(Qt::Tool | \
|
||||
Qt::FramelessWindowHint | \
|
||||
Qt::WindowStaysOnTopHint | \
|
||||
Qt::WindowDoesNotAcceptFocus);
|
||||
|
||||
//加载QSS样式表
|
||||
QFile qss(":/styles/res/stylesheet.qss");
|
||||
if(false == qss.open(QFile::ReadOnly))return;
|
||||
this->setStyleSheet(qss.readAll());
|
||||
qss.close();
|
||||
|
||||
//图标字体
|
||||
int fontId = QFontDatabase::addApplicationFont(":/font/res/FontAwesome.otf");
|
||||
QString fontName = QFontDatabase::applicationFontFamilies(fontId).at(0);
|
||||
QFont btnicofont(fontName);
|
||||
btnicofont.setPixelSize(10);
|
||||
|
||||
//单行布局
|
||||
QHBoxLayout *hb[6];
|
||||
for(int i=0; i<6; i++)
|
||||
{
|
||||
hb[i] = new QHBoxLayout();
|
||||
hb[i]->setMargin(0);
|
||||
i == 1 ? hb[i]->setSpacing(2) : hb[i]->setSpacing(0);
|
||||
}
|
||||
|
||||
widget_pinyin = new QWidget(this);
|
||||
widget_pinyin->setFixedHeight(keyboard_btn_height);
|
||||
//拼音缓存
|
||||
m_label_pinyin = new QLabel(this);
|
||||
m_label_pinyin->setFixedHeight(keyboard_btn_height*0.4);
|
||||
hb[0]->addWidget(m_label_pinyin);
|
||||
hb[0]->addStretch(1);
|
||||
|
||||
//汉子缓存
|
||||
for(int i=0; i<chinesecharacters_number; i++)
|
||||
{
|
||||
QPushButton *btn = new QPushButton(this);
|
||||
btn->setFixedHeight(keyboard_btn_height*0.6);
|
||||
btn->setFixedWidth(keyboard_btn_width); /* 增加翻页按钮宽度20200731 */
|
||||
hb[1]->addWidget(btn);
|
||||
if(i != chinesecharacters_number - 1) hb[1]->addStretch(1);
|
||||
if (i == 0 || i == chinesecharacters_number-1)
|
||||
{
|
||||
change_chinese_characters_page_list.append(btn);
|
||||
btn->setSizePolicy(QSizePolicy::Fixed, QSizePolicy::Fixed);
|
||||
btn->setFont(btnicofont);
|
||||
btn->setText(i == 0 ? QString(QChar(0xf0d9)) : QString(QChar(0xf0da)));
|
||||
btn->setObjectName("hanzichangepage");
|
||||
i == 0 ? \
|
||||
connect(btn, &QPushButton::clicked, this, &KeyboardForm::chineseCharactersUpdatePrevious) :
|
||||
connect(btn, &QPushButton::clicked, this, &KeyboardForm::chineseCharactersUpdateNext);
|
||||
}
|
||||
else
|
||||
{
|
||||
chinese_characters_list.append(btn);
|
||||
btn->setObjectName("hanzicandidates");
|
||||
connect(btn, &QPushButton::clicked, this, &KeyboardForm::chineseCharactersSelected);
|
||||
}
|
||||
}
|
||||
|
||||
QVBoxLayout *vb_pinyin = new QVBoxLayout(widget_pinyin);
|
||||
vb_pinyin->addLayout(hb[0]);
|
||||
vb_pinyin->addLayout(hb[1]);
|
||||
vb_pinyin->setMargin(0);
|
||||
vb_pinyin->setSpacing(0);
|
||||
|
||||
widget_keyboard = new QWidget(this);
|
||||
widget_keyboard->setFixedHeight(keyboard_btn_height*4.0);
|
||||
//键盘
|
||||
for(int i=0; i<29; i++)
|
||||
{
|
||||
QPushButton *btn = new QPushButton(QChar(keyboard_characters[i]),this);
|
||||
btn->setFixedSize(keyboard_btn_width, keyboard_btn_height);
|
||||
character_btns_list.append(btn);
|
||||
connect(btn, &QPushButton::clicked, this, &KeyboardForm::characterButtonClicked);
|
||||
}
|
||||
//第一排字母:0-9
|
||||
for(int i=0; i<10; i++)
|
||||
{
|
||||
hb[2]->addWidget(character_btns_list.at(i));
|
||||
}
|
||||
QPushButton *btn_backspace = new QPushButton(QChar(0xf060));
|
||||
btn_backspace->setFont(btnicofont);
|
||||
btn_backspace->setFixedSize(keyboard_btn_width*1.5, keyboard_btn_height);
|
||||
btn_backspace->setObjectName("function_button");
|
||||
hb[2]->addWidget(btn_backspace);
|
||||
connect(btn_backspace, &QPushButton::clicked, this, &KeyboardForm::btnBackspaceClicked);
|
||||
//第二排字母:10-18
|
||||
hb[3]->addStretch(1);
|
||||
for(int i=10; i<19; i++)
|
||||
{
|
||||
hb[3]->addWidget(character_btns_list.at(i));
|
||||
}
|
||||
QPushButton *btn_enter = new QPushButton("Enter");
|
||||
btn_enter->setFixedSize(keyboard_btn_width*1.5, keyboard_btn_height);
|
||||
btn_enter->setObjectName("function_button");
|
||||
hb[3]->addWidget(btn_enter);
|
||||
hb[3]->addStretch(1);
|
||||
connect(btn_enter, &QPushButton::clicked, this, &KeyboardForm::btnEnterClicked);
|
||||
//第三排字母:20-26
|
||||
QPushButton *btn_upper = new QPushButton(QChar(0xf062));
|
||||
btn_upper->setFixedSize(keyboard_btn_width*1.5, keyboard_btn_height);
|
||||
btn_upper->setFont(btnicofont);
|
||||
btn_upper->setObjectName("function_button");
|
||||
hb[4]->addWidget(btn_upper);
|
||||
connect(btn_upper, &QPushButton::clicked, this, &KeyboardForm::btnUpperClicked);
|
||||
for(int i=19; i<29; i++)
|
||||
{
|
||||
hb[4]->addWidget(character_btns_list.at(i));
|
||||
}
|
||||
character_btns_list.append(btn_upper);
|
||||
//第四排功能键
|
||||
QPushButton *btn_symbols = new QPushButton(".?123");
|
||||
btn_symbols->setFixedSize(keyboard_btn_width*1.5, keyboard_btn_height);
|
||||
btn_symbols->setObjectName("function_button");
|
||||
hb[5]->addWidget(btn_symbols);
|
||||
connect(btn_symbols, &QPushButton::clicked, this, &KeyboardForm::btnSymbolsClicked);
|
||||
QPushButton *btn_language = new QPushButton(QChar(0xf0ac));
|
||||
btn_language->setFixedSize(keyboard_btn_width, keyboard_btn_height);
|
||||
btn_language->setFont(btnicofont);
|
||||
btn_language->setObjectName("function_button");
|
||||
hb[5]->addWidget(btn_language);
|
||||
connect(btn_language, &QPushButton::clicked, this, &KeyboardForm::btnLanguageClicked);
|
||||
QPushButton *btn_blankspace = new QPushButton("English");
|
||||
btn_blankspace->setFixedHeight(keyboard_btn_height);
|
||||
hb[5]->addWidget(btn_blankspace);
|
||||
character_btns_list.append(btn_blankspace);
|
||||
connect(btn_blankspace, &QPushButton::clicked, this, &KeyboardForm::btnBlankspaceClicked);
|
||||
QPushButton *btn_emoji = new QPushButton(QChar(0xf118));
|
||||
btn_emoji->setFixedSize(keyboard_btn_width, keyboard_btn_height);
|
||||
btn_emoji->setFont(btnicofont);
|
||||
btn_emoji->setObjectName("emoji");
|
||||
hb[5]->addWidget(btn_emoji);
|
||||
connect(btn_emoji, &QPushButton::clicked, this, &KeyboardForm::btnEmojiClicked);
|
||||
QPushButton *btn_hidekeyboard = new QPushButton(QString(QChar(0xf11c)).append(QChar(0xf103)));
|
||||
btn_hidekeyboard->setFixedSize(keyboard_btn_width*1.5, keyboard_btn_height);
|
||||
btn_hidekeyboard->setFont(btnicofont);
|
||||
btn_hidekeyboard->setObjectName("function_button");
|
||||
hb[5]->addWidget(btn_hidekeyboard);
|
||||
connect(btn_hidekeyboard, &QPushButton::clicked, this, &KeyboardForm::hideKeyboard);
|
||||
|
||||
QVBoxLayout *vb_keyboard = new QVBoxLayout(widget_keyboard);
|
||||
vb_keyboard->setMargin(0);
|
||||
vb_keyboard->setSpacing(0);
|
||||
for(int i=2; i<6; i++)
|
||||
{
|
||||
vb_keyboard->addLayout(hb[i]);
|
||||
}
|
||||
|
||||
QVBoxLayout *vb_system = new QVBoxLayout(this);
|
||||
vb_system->setMargin(0);
|
||||
vb_system->setSpacing(0);
|
||||
vb_system->addStretch(1);
|
||||
vb_system->addWidget(widget_pinyin);
|
||||
vb_system->addWidget(widget_keyboard);
|
||||
widget_pinyin->hide();
|
||||
|
||||
updateButtonStateOfChineseCharacters();
|
||||
}
|
||||
|
||||
void KeyboardForm::updateButtonStateOfChineseCharacters()
|
||||
{
|
||||
if(m_label_pinyin->text().isEmpty())
|
||||
{
|
||||
m_label_pinyin->setHidden(true);
|
||||
change_chinese_characters_page_list.at(0)->setHidden(true);
|
||||
change_chinese_characters_page_list.at(1)->setHidden(true);
|
||||
}
|
||||
else
|
||||
{
|
||||
m_label_pinyin->setHidden(false);
|
||||
change_chinese_characters_page_list.at(0)->setHidden(false);
|
||||
change_chinese_characters_page_list.at(1)->setHidden(false);
|
||||
}
|
||||
}
|
||||
|
||||
void KeyboardForm::chineseCharactersUpdatePrevious()
|
||||
{
|
||||
searchChineseCharacters(-1);
|
||||
}
|
||||
|
||||
void KeyboardForm::chineseCharactersUpdateNext()
|
||||
{
|
||||
searchChineseCharacters(1);
|
||||
}
|
||||
|
||||
void KeyboardForm::chineseCharactersSelected()
|
||||
{
|
||||
emit sendKeyToFocusItem(((QPushButton*)sender())->text());
|
||||
clearChineseCache();
|
||||
}
|
||||
|
||||
void KeyboardForm::btnBackspaceClicked()
|
||||
{
|
||||
if(current_mode != InputMode::zh || m_label_pinyin->text().isEmpty())
|
||||
{
|
||||
emit sendKeyToFocusItem("\x7F");
|
||||
}
|
||||
else
|
||||
{
|
||||
m_label_pinyin->setText(m_label_pinyin->text().left(m_label_pinyin->text().length()-1));
|
||||
if(m_label_pinyin->text().isEmpty())
|
||||
{
|
||||
clearChineseCache();
|
||||
}
|
||||
else
|
||||
{
|
||||
searchChineseCharacters(0);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void KeyboardForm::btnEnterClicked()
|
||||
{
|
||||
if(current_mode != InputMode::zh || m_label_pinyin->text().isEmpty())
|
||||
{
|
||||
emit sendKeyToFocusItem("\n");
|
||||
}
|
||||
else
|
||||
{
|
||||
emit sendKeyToFocusItem(m_label_pinyin->text());
|
||||
clearChineseCache();
|
||||
}
|
||||
}
|
||||
|
||||
void KeyboardForm::btnUpperClicked()
|
||||
{
|
||||
if(current_mode == InputMode::en)
|
||||
{
|
||||
upper_mode = !upper_mode;
|
||||
}
|
||||
else if(current_mode == InputMode::zh)
|
||||
{
|
||||
if(!m_label_pinyin->text().isEmpty() && m_label_pinyin->text().right(1).compare("'"))
|
||||
{
|
||||
m_label_pinyin->setText(m_label_pinyin->text().append("'"));
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if(m_symbol_page == 0)
|
||||
{
|
||||
m_symbol_page = 1;
|
||||
character_btns_list.at(character_btns_list.length()-2)->setText("2/3");
|
||||
}
|
||||
else if(m_symbol_page == 1)
|
||||
{
|
||||
m_symbol_page = 2;
|
||||
character_btns_list.at(character_btns_list.length()-2)->setText("3/3");
|
||||
}
|
||||
else
|
||||
{
|
||||
m_symbol_page = 0;
|
||||
character_btns_list.at(character_btns_list.length()-2)->setText("1/3");
|
||||
}
|
||||
}
|
||||
updateKeyboard();
|
||||
}
|
||||
|
||||
void KeyboardForm::btnSymbolsClicked()
|
||||
{
|
||||
if(current_mode != InputMode::symb)
|
||||
{
|
||||
widget_pinyin->setHidden(true);
|
||||
if(current_mode == InputMode::en)
|
||||
{
|
||||
character_btns_list.at(character_btns_list.length()-1)->setText("Symbols");
|
||||
}
|
||||
else if(current_mode == InputMode::zh)
|
||||
{
|
||||
character_btns_list.at(character_btns_list.length()-1)->setText("符号");
|
||||
}
|
||||
((QPushButton*)sender())->setText("abc");
|
||||
last_mode = current_mode;
|
||||
current_mode = InputMode::symb;
|
||||
character_btns_list.at(character_btns_list.length()-2)->setText("1/3");
|
||||
}
|
||||
else
|
||||
{
|
||||
((QPushButton*)sender())->setText(".?123");
|
||||
current_mode = last_mode;
|
||||
m_symbol_page = 0;
|
||||
}
|
||||
upper_mode = false;
|
||||
updateKeyboard();
|
||||
}
|
||||
|
||||
void KeyboardForm::btnLanguageClicked()
|
||||
{
|
||||
upper_mode = false;
|
||||
if(current_mode == InputMode::zh)
|
||||
{
|
||||
current_mode = InputMode::en;
|
||||
}
|
||||
else if(current_mode == InputMode::en)
|
||||
{
|
||||
current_mode = InputMode::zh;
|
||||
}
|
||||
if(current_mode != InputMode::symb)
|
||||
{
|
||||
last_mode = current_mode;
|
||||
updateKeyboard();
|
||||
}
|
||||
}
|
||||
|
||||
void KeyboardForm::clearChineseCache()
|
||||
{
|
||||
m_label_pinyin->setText("");
|
||||
for(int i=0; i<chinese_characters_list.length(); i++)
|
||||
{
|
||||
chinese_characters_list.at(i)->setText("");
|
||||
}
|
||||
updateButtonStateOfChineseCharacters();
|
||||
}
|
||||
|
||||
void KeyboardForm::hideKeyboard()
|
||||
{
|
||||
clearChineseCache();
|
||||
this->hide();
|
||||
}
|
||||
|
||||
void KeyboardForm::updateKeyboard()
|
||||
{
|
||||
if(current_mode != InputMode::zh)
|
||||
{
|
||||
clearChineseCache();
|
||||
}
|
||||
if(current_mode == InputMode::symb)
|
||||
{
|
||||
character_btns_list.at(character_btns_list.length()-2)->setCheckable(false);
|
||||
for(int i=0; i<29; i++)
|
||||
{
|
||||
character_btns_list.at(i)->setText(keyboard_symbols[i + m_symbol_page*29]);
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
if(true == upper_mode && current_mode == InputMode::en)
|
||||
{
|
||||
character_btns_list.at(character_btns_list.length()-2)->setCheckable(true);
|
||||
character_btns_list.at(character_btns_list.length()-2)->setChecked(true);
|
||||
for(int i=0; i<26; i++)
|
||||
{
|
||||
character_btns_list.at(i)->setText(QChar(keyboard_characters[i]).toUpper());
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
for(int i=0; i<26; i++)
|
||||
{
|
||||
character_btns_list.at(i)->setText(QChar(keyboard_characters[i]));
|
||||
}
|
||||
}
|
||||
if(current_mode == InputMode::en)
|
||||
{
|
||||
widget_pinyin->setHidden(true);
|
||||
character_btns_list.at(character_btns_list.length()-5)->setText(",");
|
||||
character_btns_list.at(character_btns_list.length()-4)->setText(".");
|
||||
character_btns_list.at(character_btns_list.length()-3)->setText("?");
|
||||
character_btns_list.at(character_btns_list.length()-2)->setText(QChar(0xf062));
|
||||
character_btns_list.at(character_btns_list.length()-1)->setText("English");
|
||||
}
|
||||
else if(current_mode == InputMode::zh)
|
||||
{
|
||||
character_btns_list.at(character_btns_list.length()-2)->setCheckable(false);
|
||||
widget_pinyin->setHidden(false);
|
||||
character_btns_list.at(character_btns_list.length()-5)->setText(",");
|
||||
character_btns_list.at(character_btns_list.length()-4)->setText("。");
|
||||
character_btns_list.at(character_btns_list.length()-3)->setText("?");
|
||||
character_btns_list.at(character_btns_list.length()-2)->setText("分词");
|
||||
character_btns_list.at(character_btns_list.length()-1)->setText("拼音");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void KeyboardForm::btnBlankspaceClicked()
|
||||
{
|
||||
if(current_mode != InputMode::zh || m_label_pinyin->text().isEmpty())
|
||||
{
|
||||
emit sendKeyToFocusItem(" ");
|
||||
}
|
||||
else
|
||||
{
|
||||
emit sendKeyToFocusItem(chinese_characters_list.at(0)->text());
|
||||
clearChineseCache();
|
||||
}
|
||||
}
|
||||
|
||||
void KeyboardForm::btnEmojiClicked()
|
||||
{
|
||||
emit sendKeyToFocusItem("::)");
|
||||
}
|
||||
|
||||
void KeyboardForm::characterButtonClicked()
|
||||
{
|
||||
if(current_mode == InputMode::zh)
|
||||
{
|
||||
if(((QPushButton*)sender())->text() == "," || ((QPushButton*)sender())->text() == "。" || ((QPushButton*)sender())->text() == "?")
|
||||
{
|
||||
emit sendKeyToFocusItem(((QPushButton*)sender())->text());
|
||||
}
|
||||
else
|
||||
{
|
||||
if(m_label_pinyin->text().length()<15)
|
||||
{
|
||||
m_label_pinyin->setText(m_label_pinyin->text().append(((QPushButton*)sender())->text()));
|
||||
searchChineseCharacters(0);
|
||||
updateButtonStateOfChineseCharacters();
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
{
|
||||
emit sendKeyToFocusItem(((QPushButton*)sender())->text());
|
||||
}
|
||||
}
|
||||
|
||||
void KeyboardForm::searchChineseCharacters(const int ¤tpage)
|
||||
{
|
||||
const int max_spelling_length = 32;
|
||||
const int max_decoded_length = 32;
|
||||
const int max_single_hanzi = 20;
|
||||
static unsigned int page_change_times = 0;
|
||||
|
||||
QString app_dir(qApp->applicationDirPath()+"/dict");
|
||||
im_open_decoder(QString("%1/dict_pinyin.dat").arg(app_dir).toLocal8Bit().data(),
|
||||
QString("%1/dict_pinyin_user.dat").arg(app_dir).toLocal8Bit().data());
|
||||
im_set_max_lens(max_spelling_length, max_decoded_length);
|
||||
im_reset_search();
|
||||
|
||||
QByteArray bytearray(m_label_pinyin->text().toUtf8());
|
||||
char *pinyin(bytearray.data());
|
||||
size_t cand_num = im_search(pinyin, bytearray.size());
|
||||
|
||||
size_t decode_len;
|
||||
im_get_sps_str(&decode_len);
|
||||
if (decode_len == 1)
|
||||
{
|
||||
if (cand_num > 10) cand_num = 10;
|
||||
}
|
||||
else
|
||||
{
|
||||
size_t single = 0;
|
||||
size_t multi = 0;
|
||||
char16 *cand_buf = new char16[max_decoded_length];
|
||||
for(size_t i = 0; i < cand_num; i++)
|
||||
{
|
||||
im_get_candidate(i, cand_buf, max_decoded_length);
|
||||
if (strlen((char *)cand_buf) > 2)
|
||||
{
|
||||
multi++;
|
||||
}
|
||||
else
|
||||
{
|
||||
single++;
|
||||
if (single > max_single_hanzi) break;
|
||||
}
|
||||
}
|
||||
cand_num = multi + single;
|
||||
delete cand_buf;
|
||||
}
|
||||
|
||||
switch(currentpage)
|
||||
{
|
||||
case 1:
|
||||
if(cand_num > chinese_characters_list.length() && page_change_times < cand_num - chinese_characters_list.length())
|
||||
page_change_times++;
|
||||
break;
|
||||
case -1:
|
||||
if(page_change_times > 0) page_change_times--;
|
||||
break;
|
||||
default:
|
||||
page_change_times = 0;
|
||||
break;
|
||||
}
|
||||
if(0 == page_change_times)
|
||||
change_chinese_characters_page_list.at(0)->setEnabled(false);
|
||||
else
|
||||
change_chinese_characters_page_list.at(0)->setEnabled(true);
|
||||
if(page_change_times == cand_num - chinese_characters_list.length())
|
||||
change_chinese_characters_page_list.at(1)->setEnabled(false);
|
||||
else
|
||||
change_chinese_characters_page_list.at(1)->setEnabled(true);
|
||||
|
||||
char16 *cand_buf = new char16[max_decoded_length];
|
||||
char16 *cand;
|
||||
QString cand_str;
|
||||
for (unsigned i = 0; i < cand_num; i++)
|
||||
{
|
||||
cand = im_get_candidate(i, cand_buf, max_decoded_length);
|
||||
if (cand)
|
||||
{
|
||||
cand_str = QString::fromUtf16(cand);
|
||||
if (i == 0) cand_str.remove(0, im_get_fixed_len());
|
||||
}
|
||||
else
|
||||
{
|
||||
cand_str = "";
|
||||
}
|
||||
int tmpindex = i - page_change_times;
|
||||
if(tmpindex >= 0 && tmpindex < chinese_characters_list.length())
|
||||
{
|
||||
switch(currentpage)
|
||||
{
|
||||
case 1:
|
||||
chinese_characters_list.at(tmpindex)->setText(cand_str);
|
||||
break;
|
||||
case -1:
|
||||
chinese_characters_list.at(tmpindex)->setText(cand_str);
|
||||
break;
|
||||
default:
|
||||
chinese_characters_list.at(tmpindex)->setText(cand_str);
|
||||
break;
|
||||
}
|
||||
}
|
||||
}
|
||||
delete cand_buf;
|
||||
}
|
|
@ -0,0 +1,45 @@
|
|||
#ifndef KEYBOARDFORM_H
|
||||
#define KEYBOARDFORM_H
|
||||
|
||||
#include <QWidget>
|
||||
class QPushButton;
|
||||
class QLabel;
|
||||
|
||||
class KeyboardForm : public QWidget
|
||||
{
|
||||
Q_OBJECT
|
||||
|
||||
public:
|
||||
KeyboardForm(QWidget *parent = 0);
|
||||
void clearChineseCache();
|
||||
|
||||
private:
|
||||
void chineseCharactersUpdatePrevious();
|
||||
void chineseCharactersUpdateNext();
|
||||
void chineseCharactersSelected();
|
||||
void btnBackspaceClicked();
|
||||
void btnEnterClicked();
|
||||
void btnUpperClicked();
|
||||
void btnSymbolsClicked();
|
||||
void btnLanguageClicked();
|
||||
void btnBlankspaceClicked();
|
||||
void btnEmojiClicked();
|
||||
void characterButtonClicked();
|
||||
void updateKeyboard();
|
||||
void updateButtonStateOfChineseCharacters();
|
||||
void searchChineseCharacters(const int ¤tpage);
|
||||
void hideKeyboard();
|
||||
|
||||
QList<QPushButton*> character_btns_list, chinese_characters_list, change_chinese_characters_page_list;
|
||||
enum InputMode{zh, en, symb};
|
||||
InputMode current_mode, last_mode;
|
||||
QWidget *widget_keyboard, *widget_pinyin;
|
||||
bool upper_mode;
|
||||
QLabel *m_label_pinyin;
|
||||
int m_symbol_page;
|
||||
|
||||
signals:
|
||||
void sendKeyToFocusItem(const QString &keytext);
|
||||
};
|
||||
|
||||
#endif // KEYBOARDFORM_H
|
|
@ -0,0 +1,39 @@
|
|||
QT = core gui-private widgets
|
||||
|
||||
TEMPLATE = lib
|
||||
TARGET = tgtsmlInputContextPlugin
|
||||
|
||||
HEADERS += \
|
||||
tgtsmlplatforminputcontextplugin.h \
|
||||
tgtsmlplatforminputcontext.h \
|
||||
keyboardform.h
|
||||
|
||||
SOURCES += \
|
||||
tgtsmlplatforminputcontextplugin.cpp \
|
||||
tgtsmlplatforminputcontext.cpp \
|
||||
keyboardform.cpp
|
||||
|
||||
RESOURCES += \
|
||||
res.qrc
|
||||
|
||||
INCLUDEPATH += $$PWD/googlepinyin
|
||||
|
||||
win32{
|
||||
CONFIG += debug_and_release build_all
|
||||
|
||||
CONFIG(debug, debug|release){
|
||||
TARGET = ../../testWindow/debug/platformInputContexts/$$join(TARGET,,,d)
|
||||
LIBS += -L$$PWD/googlepinyin/ -lgooglepinyind
|
||||
}CONFIG(release, debug|release){
|
||||
TARGET = ../../testWindow/release/platformInputContexts/$$TARGET
|
||||
LIBS += -L$$PWD/googlepinyin/ -lgooglepinyin
|
||||
}
|
||||
}
|
||||
unix{
|
||||
TARGET = ../target/$$TARGET
|
||||
LIBS += -L$$PWD/googlepinyin/ -lgooglepinyin
|
||||
MOC_DIR = ../tmpfiles
|
||||
RCC_DIR = ../tmpfiles
|
||||
UI_DIR = ../tmpfiles
|
||||
OBJECTS_DIR = ../tmpfiles
|
||||
}
|
|
@ -0,0 +1,8 @@
|
|||
<RCC>
|
||||
<qresource prefix="/styles">
|
||||
<file>res/stylesheet.qss</file>
|
||||
</qresource>
|
||||
<qresource prefix="/font">
|
||||
<file>res/FontAwesome.otf</file>
|
||||
</qresource>
|
||||
</RCC>
|
Binary file not shown.
|
@ -0,0 +1,68 @@
|
|||
QWidget{
|
||||
background-color:black;
|
||||
}
|
||||
|
||||
QPushButton{
|
||||
font:25px;
|
||||
background-color:gray;
|
||||
color:white;
|
||||
border:2px solid black;
|
||||
border-radius:10px;
|
||||
}
|
||||
|
||||
QPushButton#function_button{
|
||||
background-color:rgb(80,80,80);
|
||||
color:rgb(200,200,200);
|
||||
font:20px;
|
||||
}
|
||||
|
||||
QPushButton:hover{
|
||||
background-color:green;
|
||||
color:white;
|
||||
}
|
||||
|
||||
QPushButton:pressed,
|
||||
QPushButton#function_button::pressed{
|
||||
background-color:red;
|
||||
color:white;
|
||||
}
|
||||
|
||||
QPushButton#function_button:checked{
|
||||
background-color:rgb(200,100,50);
|
||||
color:yellow;
|
||||
}
|
||||
|
||||
QPushButton#hanzichangepage,
|
||||
QPushButton#hanzicandidates{
|
||||
background-color:transparent;
|
||||
color:white;
|
||||
}
|
||||
|
||||
QPushButton#hanzichangepage:pressed{
|
||||
color:red;
|
||||
}
|
||||
|
||||
QPushButton#hanzichangepage:disabled{
|
||||
color:gray;
|
||||
}
|
||||
|
||||
QPushButton#emoji{
|
||||
background-color:rgb(80,80,80);
|
||||
color:yellow;
|
||||
}
|
||||
|
||||
QPushButton#emoji:hover{
|
||||
color:red;
|
||||
}
|
||||
|
||||
QPushButton#emoji:pressed{
|
||||
color:blue;
|
||||
}
|
||||
|
||||
QLabel{
|
||||
background-color:rgba(255,255,255,80);
|
||||
border-radius:2px;
|
||||
color:white;
|
||||
font:15px;
|
||||
margin-left: 5px;
|
||||
}
|
|
@ -0,0 +1,3 @@
|
|||
{
|
||||
"Keys": [ "tgtsml" ]
|
||||
}
|
|
@ -0,0 +1,99 @@
|
|||
#include "tgtsmlplatforminputcontext.h"
|
||||
#include <QCoreApplication>
|
||||
#include <QKeyEvent>
|
||||
#include <QApplication>
|
||||
#include <QDesktopWidget>
|
||||
|
||||
TgtsmlPlatformInputContext::TgtsmlPlatformInputContext()
|
||||
{
|
||||
m_focusitem = nullptr;
|
||||
m_keyboard = nullptr;
|
||||
}
|
||||
|
||||
TgtsmlPlatformInputContext::~TgtsmlPlatformInputContext()
|
||||
{
|
||||
disconnect(m_keyboard, &KeyboardForm::sendKeyToFocusItem, this, &TgtsmlPlatformInputContext::sendKeyToFocusItem);
|
||||
if(m_keyboard) delete m_keyboard;
|
||||
}
|
||||
|
||||
void TgtsmlPlatformInputContext::sendKeyToFocusItem(const QString &keytext)
|
||||
{
|
||||
if(!m_focusitem)return;
|
||||
|
||||
if(keytext == QString("\x7F")) //Backspace <--
|
||||
{
|
||||
QCoreApplication::sendEvent(m_focusitem, new QKeyEvent(QEvent::KeyPress, Qt::Key_Backspace, Qt::NoModifier));
|
||||
QCoreApplication::sendEvent(m_focusitem, new QKeyEvent(QEvent::KeyRelease, Qt::Key_Backspace, Qt::NoModifier));
|
||||
}
|
||||
else if(keytext == QString("\n"))
|
||||
{
|
||||
QCoreApplication::sendEvent(m_focusitem, new QKeyEvent(QEvent::KeyPress, Qt::Key_Return, Qt::NoModifier));
|
||||
QCoreApplication::sendEvent(m_focusitem, new QKeyEvent(QEvent::KeyRelease, Qt::Key_Return, Qt::NoModifier));
|
||||
}
|
||||
else if(keytext == QString("&&"))
|
||||
{
|
||||
QCoreApplication::sendEvent(m_focusitem, new QKeyEvent(QEvent::KeyPress, 0, Qt::NoModifier, "&"));
|
||||
QCoreApplication::sendEvent(m_focusitem, new QKeyEvent(QEvent::KeyRelease, 0, Qt::NoModifier, "&"));
|
||||
}
|
||||
else
|
||||
{
|
||||
QCoreApplication::sendEvent(m_focusitem, new QKeyEvent(QEvent::KeyPress, 0, Qt::NoModifier, keytext));
|
||||
QCoreApplication::sendEvent(m_focusitem, new QKeyEvent(QEvent::KeyRelease, 0, Qt::NoModifier, keytext));
|
||||
}
|
||||
}
|
||||
|
||||
bool TgtsmlPlatformInputContext::isValid() const
|
||||
{
|
||||
return true;
|
||||
}
|
||||
|
||||
void TgtsmlPlatformInputContext::setFocusObject(QObject *object)
|
||||
{
|
||||
m_focusitem = object;
|
||||
}
|
||||
|
||||
void TgtsmlPlatformInputContext::showInputPanel()
|
||||
{
|
||||
if(!m_keyboard){
|
||||
m_keyboard = new KeyboardForm;
|
||||
connect(m_keyboard, &KeyboardForm::sendKeyToFocusItem, this, &TgtsmlPlatformInputContext::sendKeyToFocusItem);
|
||||
}
|
||||
if(m_keyboard->isHidden())m_keyboard->show();
|
||||
QWidget *widgetTmp = qobject_cast<QWidget*>(m_focusitem);
|
||||
if(widgetTmp){
|
||||
QPoint widgetGlobalPos = widgetTmp->mapToGlobal(QPoint(0, 0));
|
||||
if(widgetGlobalPos.x() < 0){
|
||||
widgetGlobalPos.setX(0);
|
||||
}
|
||||
if(widgetGlobalPos.y() < 0){
|
||||
widgetGlobalPos.setY(0);
|
||||
}
|
||||
if(qApp->desktop()->width() - widgetGlobalPos.x() < m_keyboard->width()){
|
||||
widgetGlobalPos.setX(qApp->desktop()->width() - m_keyboard->width());
|
||||
}
|
||||
if(qApp->desktop()->height() - widgetGlobalPos.y() - 30 < m_keyboard->height()){
|
||||
widgetGlobalPos.setY(widgetGlobalPos.y() - m_keyboard->height() - 10);
|
||||
}
|
||||
else{
|
||||
widgetGlobalPos = widgetGlobalPos + QPoint(0,30);
|
||||
}
|
||||
m_keyboard->move(widgetGlobalPos);
|
||||
}
|
||||
}
|
||||
|
||||
void TgtsmlPlatformInputContext::hideInputPanel()
|
||||
{
|
||||
if(!m_keyboard){
|
||||
return;
|
||||
}
|
||||
if(!m_keyboard->isHidden()){
|
||||
m_keyboard->hide();
|
||||
}
|
||||
m_keyboard->clearChineseCache();
|
||||
}
|
||||
|
||||
bool TgtsmlPlatformInputContext::isInputPanelVisible() const
|
||||
{
|
||||
return m_keyboard->isVisible();
|
||||
}
|
||||
|
|
@ -0,0 +1,27 @@
|
|||
#ifndef TGTSMLPLATFORMINPUTCONTEXT_H
|
||||
#define TGTSMLPLATFORMINPUTCONTEXT_H
|
||||
|
||||
#include <qpa/qplatforminputcontext.h>
|
||||
#include "keyboardform.h"
|
||||
|
||||
class TgtsmlPlatformInputContext : public QPlatformInputContext
|
||||
{
|
||||
Q_OBJECT
|
||||
public:
|
||||
TgtsmlPlatformInputContext();
|
||||
~TgtsmlPlatformInputContext();
|
||||
|
||||
bool isValid() const Q_DECL_OVERRIDE;
|
||||
void setFocusObject(QObject *object) Q_DECL_OVERRIDE;
|
||||
void showInputPanel() Q_DECL_OVERRIDE;
|
||||
void hideInputPanel() Q_DECL_OVERRIDE;
|
||||
bool isInputPanelVisible() const Q_DECL_OVERRIDE;
|
||||
|
||||
private:
|
||||
void sendKeyToFocusItem(const QString &keytext);
|
||||
|
||||
KeyboardForm *m_keyboard;
|
||||
QObject *m_focusitem;
|
||||
};
|
||||
|
||||
#endif // TGTSMLPLATFORMINPUTCONTEXT_H
|
|
@ -0,0 +1,12 @@
|
|||
#include "tgtsmlplatforminputcontextplugin.h"
|
||||
|
||||
TgtsmlPlatformInputContext * TgtsmlPlatformInputContextPlugin::create(const QString &key, const QStringList ¶mlist)
|
||||
{
|
||||
Q_UNUSED(paramlist)
|
||||
|
||||
if(key.compare("tgtsml",Qt::CaseInsensitive) == 0)
|
||||
{
|
||||
return new TgtsmlPlatformInputContext;
|
||||
}
|
||||
return nullptr;
|
||||
}
|
|
@ -0,0 +1,16 @@
|
|||
#ifndef TGTSMLPLATFORMINPUTCONTEXTPLUGIN_H
|
||||
#define TGTSMLPLATFORMINPUTCONTEXTPLUGIN_H
|
||||
|
||||
#include <qpa/qplatforminputcontextplugin_p.h>
|
||||
#include "tgtsmlplatforminputcontext.h"
|
||||
|
||||
class TgtsmlPlatformInputContextPlugin : public QPlatformInputContextPlugin
|
||||
{
|
||||
Q_OBJECT
|
||||
Q_PLUGIN_METADATA(IID QPlatformInputContextFactoryInterface_iid FILE "./res/tgtsml.json")
|
||||
|
||||
public:
|
||||
TgtsmlPlatformInputContext *create(const QString &key, const QStringList ¶mlist);
|
||||
};
|
||||
|
||||
#endif // TGTSMLPLATFORMINPUTCONTEXTPLUGIN_H
|
|
@ -0,0 +1,13 @@
|
|||
#include "mainwindow.h"
|
||||
|
||||
#include <QApplication>
|
||||
|
||||
int main(int argc, char *argv[])
|
||||
{
|
||||
//qputenv("QT_IM_MODULE", QByteArray("tgtsml"));
|
||||
//qputenv("QT_IM_MODULE", QByteArray("qtvirtualkeyboard"));
|
||||
QApplication a(argc, argv);
|
||||
MainWindow w;
|
||||
w.show();
|
||||
return a.exec();
|
||||
}
|
|
@ -0,0 +1,15 @@
|
|||
#include "mainwindow.h"
|
||||
#include "ui_mainwindow.h"
|
||||
|
||||
MainWindow::MainWindow(QWidget *parent)
|
||||
: QMainWindow(parent)
|
||||
, ui(new Ui::MainWindow)
|
||||
{
|
||||
ui->setupUi(this);
|
||||
}
|
||||
|
||||
MainWindow::~MainWindow()
|
||||
{
|
||||
delete ui;
|
||||
}
|
||||
|
|
@ -0,0 +1,21 @@
|
|||
#ifndef MAINWINDOW_H
|
||||
#define MAINWINDOW_H
|
||||
|
||||
#include <QMainWindow>
|
||||
|
||||
QT_BEGIN_NAMESPACE
|
||||
namespace Ui { class MainWindow; }
|
||||
QT_END_NAMESPACE
|
||||
|
||||
class MainWindow : public QMainWindow
|
||||
{
|
||||
Q_OBJECT
|
||||
|
||||
public:
|
||||
MainWindow(QWidget *parent = nullptr);
|
||||
~MainWindow();
|
||||
|
||||
private:
|
||||
Ui::MainWindow *ui;
|
||||
};
|
||||
#endif // MAINWINDOW_H
|
|
@ -0,0 +1,158 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<ui version="4.0">
|
||||
<class>MainWindow</class>
|
||||
<widget class="QMainWindow" name="MainWindow">
|
||||
<property name="geometry">
|
||||
<rect>
|
||||
<x>0</x>
|
||||
<y>0</y>
|
||||
<width>800</width>
|
||||
<height>600</height>
|
||||
</rect>
|
||||
</property>
|
||||
<property name="windowTitle">
|
||||
<string>MainWindow</string>
|
||||
</property>
|
||||
<widget class="QWidget" name="centralwidget">
|
||||
<layout class="QGridLayout" name="gridLayout">
|
||||
<item row="0" column="0">
|
||||
<widget class="QLineEdit" name="lineEdit"/>
|
||||
</item>
|
||||
<item row="0" column="1">
|
||||
<widget class="QLineEdit" name="lineEdit_2"/>
|
||||
</item>
|
||||
<item row="0" column="4" rowspan="2">
|
||||
<widget class="QTextEdit" name="textEdit_2"/>
|
||||
</item>
|
||||
<item row="1" column="0" rowspan="10">
|
||||
<widget class="QTextEdit" name="textEdit"/>
|
||||
</item>
|
||||
<item row="1" column="1">
|
||||
<widget class="QPushButton" name="pushButton_8">
|
||||
<property name="text">
|
||||
<string>PushButton</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="2" column="1">
|
||||
<widget class="QLineEdit" name="lineEdit_5"/>
|
||||
</item>
|
||||
<item row="2" column="4" rowspan="5">
|
||||
<widget class="QTextEdit" name="textEdit_3"/>
|
||||
</item>
|
||||
<item row="3" column="1">
|
||||
<widget class="QLineEdit" name="lineEdit_6"/>
|
||||
</item>
|
||||
<item row="4" column="1">
|
||||
<widget class="QPushButton" name="pushButton_2">
|
||||
<property name="text">
|
||||
<string>PushButton</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="5" column="1">
|
||||
<widget class="QPushButton" name="pushButton">
|
||||
<property name="text">
|
||||
<string>PushButton</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="6" column="1">
|
||||
<widget class="QLineEdit" name="lineEdit_7"/>
|
||||
</item>
|
||||
<item row="7" column="1">
|
||||
<widget class="QLineEdit" name="lineEdit_4"/>
|
||||
</item>
|
||||
<item row="7" column="2" rowspan="5">
|
||||
<widget class="QTextEdit" name="textEdit_4"/>
|
||||
</item>
|
||||
<item row="7" column="3" rowspan="5">
|
||||
<widget class="QTextEdit" name="textEdit_5"/>
|
||||
</item>
|
||||
<item row="7" column="4" rowspan="5">
|
||||
<widget class="QTextEdit" name="textEdit_6"/>
|
||||
</item>
|
||||
<item row="8" column="1">
|
||||
<widget class="QLineEdit" name="lineEdit_8"/>
|
||||
</item>
|
||||
<item row="9" column="1">
|
||||
<widget class="QPushButton" name="pushButton_3">
|
||||
<property name="text">
|
||||
<string>PushButton</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="10" column="1">
|
||||
<widget class="QLineEdit" name="lineEdit_9"/>
|
||||
</item>
|
||||
<item row="11" column="0">
|
||||
<widget class="QSpinBox" name="spinBox"/>
|
||||
</item>
|
||||
<item row="11" column="1">
|
||||
<widget class="QPushButton" name="pushButton_4">
|
||||
<property name="text">
|
||||
<string>PushButton</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="12" column="0">
|
||||
<widget class="QLineEdit" name="lineEdit_3"/>
|
||||
</item>
|
||||
<item row="12" column="1">
|
||||
<widget class="QLineEdit" name="lineEdit_10"/>
|
||||
</item>
|
||||
<item row="12" column="2" rowspan="5" colspan="3">
|
||||
<widget class="QTextBrowser" name="textBrowser"/>
|
||||
</item>
|
||||
<item row="13" column="0">
|
||||
<widget class="QDoubleSpinBox" name="doubleSpinBox"/>
|
||||
</item>
|
||||
<item row="13" column="1">
|
||||
<widget class="QLineEdit" name="lineEdit_11"/>
|
||||
</item>
|
||||
<item row="14" column="0">
|
||||
<widget class="QTimeEdit" name="timeEdit"/>
|
||||
</item>
|
||||
<item row="14" column="1">
|
||||
<widget class="QPushButton" name="pushButton_6">
|
||||
<property name="text">
|
||||
<string>PushButton</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="15" column="0">
|
||||
<widget class="QDateTimeEdit" name="dateTimeEdit"/>
|
||||
</item>
|
||||
<item row="15" column="1">
|
||||
<widget class="QPushButton" name="pushButton_7">
|
||||
<property name="text">
|
||||
<string>PushButton</string>
|
||||
</property>
|
||||
</widget>
|
||||
</item>
|
||||
<item row="16" column="0">
|
||||
<widget class="QDateEdit" name="dateEdit"/>
|
||||
</item>
|
||||
<item row="16" column="1">
|
||||
<widget class="QLineEdit" name="lineEdit_12"/>
|
||||
</item>
|
||||
<item row="0" column="2" rowspan="7" colspan="2">
|
||||
<widget class="QListView" name="listView"/>
|
||||
</item>
|
||||
</layout>
|
||||
</widget>
|
||||
<widget class="QMenuBar" name="menubar">
|
||||
<property name="geometry">
|
||||
<rect>
|
||||
<x>0</x>
|
||||
<y>0</y>
|
||||
<width>800</width>
|
||||
<height>23</height>
|
||||
</rect>
|
||||
</property>
|
||||
</widget>
|
||||
<widget class="QStatusBar" name="statusbar"/>
|
||||
</widget>
|
||||
<resources/>
|
||||
<connections/>
|
||||
</ui>
|
|
@ -0,0 +1,34 @@
|
|||
QT += core gui
|
||||
|
||||
greaterThan(QT_MAJOR_VERSION, 4): QT += widgets
|
||||
|
||||
CONFIG += c++11
|
||||
|
||||
# The following define makes your compiler emit warnings if you use
|
||||
# any Qt feature that has been marked deprecated (the exact warnings
|
||||
# depend on your compiler). Please consult the documentation of the
|
||||
# deprecated API in order to know how to port your code away from it.
|
||||
DEFINES += QT_DEPRECATED_WARNINGS
|
||||
|
||||
# You can also make your code fail to compile if it uses deprecated APIs.
|
||||
# In order to do so, uncomment the following line.
|
||||
# You can also select to disable deprecated APIs only up to a certain version of Qt.
|
||||
#DEFINES += QT_DISABLE_DEPRECATED_BEFORE=0x060000 # disables all the APIs deprecated before Qt 6.0.0
|
||||
|
||||
SOURCES += \
|
||||
main.cpp \
|
||||
mainwindow.cpp
|
||||
|
||||
HEADERS += \
|
||||
mainwindow.h
|
||||
|
||||
FORMS += \
|
||||
mainwindow.ui
|
||||
|
||||
unix{
|
||||
TARGET = ../target/window
|
||||
MOC_DIR = ../tmpfiles
|
||||
RCC_DIR = ../tmpfiles
|
||||
UI_DIR = ../tmpfiles
|
||||
OBJECTS_DIR = ../tmpfiles
|
||||
}
|
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
After Width: | Height: | Size: 26 KiB |
Binary file not shown.
|
@ -0,0 +1,7 @@
|
|||
TEMPLATE = subdirs
|
||||
SUBDIRS = googlepinyin plugin testWindow
|
||||
|
||||
CONFIG += ordered
|
||||
|
||||
testWindow.depends = plugin
|
||||
plugin.depends = googlepinyin
|
|
@ -0,0 +1,866 @@
|
|||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE QtCreatorProject>
|
||||
<!-- Written by QtCreator 4.0.1, 2020-07-31T14:42:27. -->
|
||||
<qtcreator>
|
||||
<data>
|
||||
<variable>EnvironmentId</variable>
|
||||
<value type="QByteArray">{9ebfe731-b774-4413-aeaf-2512ba22bbcf}</value>
|
||||
</data>
|
||||
<data>
|
||||
<variable>ProjectExplorer.Project.ActiveTarget</variable>
|
||||
<value type="int">0</value>
|
||||
</data>
|
||||
<data>
|
||||
<variable>ProjectExplorer.Project.EditorSettings</variable>
|
||||
<valuemap type="QVariantMap">
|
||||
<value type="bool" key="EditorConfiguration.AutoIndent">true</value>
|
||||
<value type="bool" key="EditorConfiguration.AutoSpacesForTabs">false</value>
|
||||
<value type="bool" key="EditorConfiguration.CamelCaseNavigation">true</value>
|
||||
<valuemap type="QVariantMap" key="EditorConfiguration.CodeStyle.0">
|
||||
<value type="QString" key="language">Cpp</value>
|
||||
<valuemap type="QVariantMap" key="value">
|
||||
<value type="QByteArray" key="CurrentPreferences">CppGlobal</value>
|
||||
</valuemap>
|
||||
</valuemap>
|
||||
<valuemap type="QVariantMap" key="EditorConfiguration.CodeStyle.1">
|
||||
<value type="QString" key="language">QmlJS</value>
|
||||
<valuemap type="QVariantMap" key="value">
|
||||
<value type="QByteArray" key="CurrentPreferences">QmlJSGlobal</value>
|
||||
</valuemap>
|
||||
</valuemap>
|
||||
<value type="int" key="EditorConfiguration.CodeStyle.Count">2</value>
|
||||
<value type="QByteArray" key="EditorConfiguration.Codec">UTF-8</value>
|
||||
<value type="bool" key="EditorConfiguration.ConstrainTooltips">false</value>
|
||||
<value type="int" key="EditorConfiguration.IndentSize">4</value>
|
||||
<value type="bool" key="EditorConfiguration.KeyboardTooltips">false</value>
|
||||
<value type="int" key="EditorConfiguration.MarginColumn">80</value>
|
||||
<value type="bool" key="EditorConfiguration.MouseHiding">true</value>
|
||||
<value type="bool" key="EditorConfiguration.MouseNavigation">true</value>
|
||||
<value type="int" key="EditorConfiguration.PaddingMode">1</value>
|
||||
<value type="bool" key="EditorConfiguration.ScrollWheelZooming">true</value>
|
||||
<value type="bool" key="EditorConfiguration.ShowMargin">false</value>
|
||||
<value type="int" key="EditorConfiguration.SmartBackspaceBehavior">0</value>
|
||||
<value type="bool" key="EditorConfiguration.SmartSelectionChanging">true</value>
|
||||
<value type="bool" key="EditorConfiguration.SpacesForTabs">true</value>
|
||||
<value type="int" key="EditorConfiguration.TabKeyBehavior">0</value>
|
||||
<value type="int" key="EditorConfiguration.TabSize">8</value>
|
||||
<value type="bool" key="EditorConfiguration.UseGlobal">true</value>
|
||||
<value type="int" key="EditorConfiguration.Utf8BomBehavior">1</value>
|
||||
<value type="bool" key="EditorConfiguration.addFinalNewLine">true</value>
|
||||
<value type="bool" key="EditorConfiguration.cleanIndentation">true</value>
|
||||
<value type="bool" key="EditorConfiguration.cleanWhitespace">true</value>
|
||||
<value type="bool" key="EditorConfiguration.inEntireDocument">false</value>
|
||||
</valuemap>
|
||||
</data>
|
||||
<data>
|
||||
<variable>ProjectExplorer.Project.PluginSettings</variable>
|
||||
<valuemap type="QVariantMap"/>
|
||||
</data>
|
||||
<data>
|
||||
<variable>ProjectExplorer.Project.Target.0</variable>
|
||||
<valuemap type="QVariantMap">
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">Desktop Qt 5.6.1 GCC 64bit</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName">Desktop Qt 5.6.1 GCC 64bit</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">qt.56.gcc_64_kit</value>
|
||||
<value type="int" key="ProjectExplorer.Target.ActiveBuildConfiguration">0</value>
|
||||
<value type="int" key="ProjectExplorer.Target.ActiveDeployConfiguration">0</value>
|
||||
<value type="int" key="ProjectExplorer.Target.ActiveRunConfiguration">0</value>
|
||||
<valuemap type="QVariantMap" key="ProjectExplorer.Target.BuildConfiguration.0">
|
||||
<value type="QString" key="ProjectExplorer.BuildConfiguration.BuildDirectory">/home/han/qtinput/build-tgtsmlInputContextPlugin-Desktop_Qt_5_6_1_GCC_64bit-Debug</value>
|
||||
<valuemap type="QVariantMap" key="ProjectExplorer.BuildConfiguration.BuildStepList.0">
|
||||
<valuemap type="QVariantMap" key="ProjectExplorer.BuildStepList.Step.0">
|
||||
<value type="bool" key="ProjectExplorer.BuildStep.Enabled">true</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">qmake</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName"></value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">QtProjectManager.QMakeBuildStep</value>
|
||||
<value type="bool" key="QtProjectManager.QMakeBuildStep.LinkQmlDebuggingLibrary">true</value>
|
||||
<value type="QString" key="QtProjectManager.QMakeBuildStep.QMakeArguments"></value>
|
||||
<value type="bool" key="QtProjectManager.QMakeBuildStep.QMakeForced">false</value>
|
||||
<value type="bool" key="QtProjectManager.QMakeBuildStep.SeparateDebugInfo">false</value>
|
||||
<value type="bool" key="QtProjectManager.QMakeBuildStep.UseQtQuickCompiler">false</value>
|
||||
</valuemap>
|
||||
<valuemap type="QVariantMap" key="ProjectExplorer.BuildStepList.Step.1">
|
||||
<value type="bool" key="ProjectExplorer.BuildStep.Enabled">true</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">Make</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName"></value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">Qt4ProjectManager.MakeStep</value>
|
||||
<valuelist type="QVariantList" key="Qt4ProjectManager.MakeStep.AutomaticallyAddedMakeArguments">
|
||||
<value type="QString">-w</value>
|
||||
<value type="QString">-r</value>
|
||||
</valuelist>
|
||||
<value type="bool" key="Qt4ProjectManager.MakeStep.Clean">false</value>
|
||||
<value type="QString" key="Qt4ProjectManager.MakeStep.MakeArguments"></value>
|
||||
<value type="QString" key="Qt4ProjectManager.MakeStep.MakeCommand"></value>
|
||||
</valuemap>
|
||||
<value type="int" key="ProjectExplorer.BuildStepList.StepsCount">2</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">构建</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName"></value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">ProjectExplorer.BuildSteps.Build</value>
|
||||
</valuemap>
|
||||
<valuemap type="QVariantMap" key="ProjectExplorer.BuildConfiguration.BuildStepList.1">
|
||||
<valuemap type="QVariantMap" key="ProjectExplorer.BuildStepList.Step.0">
|
||||
<value type="bool" key="ProjectExplorer.BuildStep.Enabled">true</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">Make</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName"></value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">Qt4ProjectManager.MakeStep</value>
|
||||
<valuelist type="QVariantList" key="Qt4ProjectManager.MakeStep.AutomaticallyAddedMakeArguments">
|
||||
<value type="QString">-w</value>
|
||||
<value type="QString">-r</value>
|
||||
</valuelist>
|
||||
<value type="bool" key="Qt4ProjectManager.MakeStep.Clean">true</value>
|
||||
<value type="QString" key="Qt4ProjectManager.MakeStep.MakeArguments">clean</value>
|
||||
<value type="QString" key="Qt4ProjectManager.MakeStep.MakeCommand"></value>
|
||||
</valuemap>
|
||||
<value type="int" key="ProjectExplorer.BuildStepList.StepsCount">1</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">清理</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName"></value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">ProjectExplorer.BuildSteps.Clean</value>
|
||||
</valuemap>
|
||||
<value type="int" key="ProjectExplorer.BuildConfiguration.BuildStepListCount">2</value>
|
||||
<value type="bool" key="ProjectExplorer.BuildConfiguration.ClearSystemEnvironment">false</value>
|
||||
<valuelist type="QVariantList" key="ProjectExplorer.BuildConfiguration.UserEnvironmentChanges"/>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">Debug</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName"></value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">Qt4ProjectManager.Qt4BuildConfiguration</value>
|
||||
<value type="int" key="Qt4ProjectManager.Qt4BuildConfiguration.BuildConfiguration">2</value>
|
||||
<value type="bool" key="Qt4ProjectManager.Qt4BuildConfiguration.UseShadowBuild">true</value>
|
||||
</valuemap>
|
||||
<valuemap type="QVariantMap" key="ProjectExplorer.Target.BuildConfiguration.1">
|
||||
<value type="QString" key="ProjectExplorer.BuildConfiguration.BuildDirectory">/home/han/qtinput/build-tgtsmlInputContextPlugin-Desktop_Qt_5_6_1_GCC_64bit-Release</value>
|
||||
<valuemap type="QVariantMap" key="ProjectExplorer.BuildConfiguration.BuildStepList.0">
|
||||
<valuemap type="QVariantMap" key="ProjectExplorer.BuildStepList.Step.0">
|
||||
<value type="bool" key="ProjectExplorer.BuildStep.Enabled">true</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">qmake</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName"></value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">QtProjectManager.QMakeBuildStep</value>
|
||||
<value type="bool" key="QtProjectManager.QMakeBuildStep.LinkQmlDebuggingLibrary">false</value>
|
||||
<value type="QString" key="QtProjectManager.QMakeBuildStep.QMakeArguments"></value>
|
||||
<value type="bool" key="QtProjectManager.QMakeBuildStep.QMakeForced">false</value>
|
||||
<value type="bool" key="QtProjectManager.QMakeBuildStep.SeparateDebugInfo">false</value>
|
||||
<value type="bool" key="QtProjectManager.QMakeBuildStep.UseQtQuickCompiler">false</value>
|
||||
</valuemap>
|
||||
<valuemap type="QVariantMap" key="ProjectExplorer.BuildStepList.Step.1">
|
||||
<value type="bool" key="ProjectExplorer.BuildStep.Enabled">true</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">Make</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName"></value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">Qt4ProjectManager.MakeStep</value>
|
||||
<valuelist type="QVariantList" key="Qt4ProjectManager.MakeStep.AutomaticallyAddedMakeArguments">
|
||||
<value type="QString">-w</value>
|
||||
<value type="QString">-r</value>
|
||||
</valuelist>
|
||||
<value type="bool" key="Qt4ProjectManager.MakeStep.Clean">false</value>
|
||||
<value type="QString" key="Qt4ProjectManager.MakeStep.MakeArguments"></value>
|
||||
<value type="QString" key="Qt4ProjectManager.MakeStep.MakeCommand"></value>
|
||||
</valuemap>
|
||||
<value type="int" key="ProjectExplorer.BuildStepList.StepsCount">2</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">构建</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName"></value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">ProjectExplorer.BuildSteps.Build</value>
|
||||
</valuemap>
|
||||
<valuemap type="QVariantMap" key="ProjectExplorer.BuildConfiguration.BuildStepList.1">
|
||||
<valuemap type="QVariantMap" key="ProjectExplorer.BuildStepList.Step.0">
|
||||
<value type="bool" key="ProjectExplorer.BuildStep.Enabled">true</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">Make</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName"></value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">Qt4ProjectManager.MakeStep</value>
|
||||
<valuelist type="QVariantList" key="Qt4ProjectManager.MakeStep.AutomaticallyAddedMakeArguments">
|
||||
<value type="QString">-w</value>
|
||||
<value type="QString">-r</value>
|
||||
</valuelist>
|
||||
<value type="bool" key="Qt4ProjectManager.MakeStep.Clean">true</value>
|
||||
<value type="QString" key="Qt4ProjectManager.MakeStep.MakeArguments">clean</value>
|
||||
<value type="QString" key="Qt4ProjectManager.MakeStep.MakeCommand"></value>
|
||||
</valuemap>
|
||||
<value type="int" key="ProjectExplorer.BuildStepList.StepsCount">1</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">清理</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName"></value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">ProjectExplorer.BuildSteps.Clean</value>
|
||||
</valuemap>
|
||||
<value type="int" key="ProjectExplorer.BuildConfiguration.BuildStepListCount">2</value>
|
||||
<value type="bool" key="ProjectExplorer.BuildConfiguration.ClearSystemEnvironment">false</value>
|
||||
<valuelist type="QVariantList" key="ProjectExplorer.BuildConfiguration.UserEnvironmentChanges"/>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">Release</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName"></value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">Qt4ProjectManager.Qt4BuildConfiguration</value>
|
||||
<value type="int" key="Qt4ProjectManager.Qt4BuildConfiguration.BuildConfiguration">0</value>
|
||||
<value type="bool" key="Qt4ProjectManager.Qt4BuildConfiguration.UseShadowBuild">true</value>
|
||||
</valuemap>
|
||||
<valuemap type="QVariantMap" key="ProjectExplorer.Target.BuildConfiguration.2">
|
||||
<value type="QString" key="ProjectExplorer.BuildConfiguration.BuildDirectory">/home/han/qtinput/build-tgtsmlInputContextPlugin-Desktop_Qt_5_6_1_GCC_64bit-Profile</value>
|
||||
<valuemap type="QVariantMap" key="ProjectExplorer.BuildConfiguration.BuildStepList.0">
|
||||
<valuemap type="QVariantMap" key="ProjectExplorer.BuildStepList.Step.0">
|
||||
<value type="bool" key="ProjectExplorer.BuildStep.Enabled">true</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">qmake</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName"></value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">QtProjectManager.QMakeBuildStep</value>
|
||||
<value type="bool" key="QtProjectManager.QMakeBuildStep.LinkQmlDebuggingLibrary">true</value>
|
||||
<value type="QString" key="QtProjectManager.QMakeBuildStep.QMakeArguments"></value>
|
||||
<value type="bool" key="QtProjectManager.QMakeBuildStep.QMakeForced">false</value>
|
||||
<value type="bool" key="QtProjectManager.QMakeBuildStep.SeparateDebugInfo">true</value>
|
||||
<value type="bool" key="QtProjectManager.QMakeBuildStep.UseQtQuickCompiler">false</value>
|
||||
</valuemap>
|
||||
<valuemap type="QVariantMap" key="ProjectExplorer.BuildStepList.Step.1">
|
||||
<value type="bool" key="ProjectExplorer.BuildStep.Enabled">true</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">Make</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName"></value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">Qt4ProjectManager.MakeStep</value>
|
||||
<valuelist type="QVariantList" key="Qt4ProjectManager.MakeStep.AutomaticallyAddedMakeArguments">
|
||||
<value type="QString">-w</value>
|
||||
<value type="QString">-r</value>
|
||||
</valuelist>
|
||||
<value type="bool" key="Qt4ProjectManager.MakeStep.Clean">false</value>
|
||||
<value type="QString" key="Qt4ProjectManager.MakeStep.MakeArguments"></value>
|
||||
<value type="QString" key="Qt4ProjectManager.MakeStep.MakeCommand"></value>
|
||||
</valuemap>
|
||||
<value type="int" key="ProjectExplorer.BuildStepList.StepsCount">2</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">构建</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName"></value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">ProjectExplorer.BuildSteps.Build</value>
|
||||
</valuemap>
|
||||
<valuemap type="QVariantMap" key="ProjectExplorer.BuildConfiguration.BuildStepList.1">
|
||||
<valuemap type="QVariantMap" key="ProjectExplorer.BuildStepList.Step.0">
|
||||
<value type="bool" key="ProjectExplorer.BuildStep.Enabled">true</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">Make</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName"></value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">Qt4ProjectManager.MakeStep</value>
|
||||
<valuelist type="QVariantList" key="Qt4ProjectManager.MakeStep.AutomaticallyAddedMakeArguments">
|
||||
<value type="QString">-w</value>
|
||||
<value type="QString">-r</value>
|
||||
</valuelist>
|
||||
<value type="bool" key="Qt4ProjectManager.MakeStep.Clean">true</value>
|
||||
<value type="QString" key="Qt4ProjectManager.MakeStep.MakeArguments">clean</value>
|
||||
<value type="QString" key="Qt4ProjectManager.MakeStep.MakeCommand"></value>
|
||||
</valuemap>
|
||||
<value type="int" key="ProjectExplorer.BuildStepList.StepsCount">1</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">清理</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName"></value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">ProjectExplorer.BuildSteps.Clean</value>
|
||||
</valuemap>
|
||||
<value type="int" key="ProjectExplorer.BuildConfiguration.BuildStepListCount">2</value>
|
||||
<value type="bool" key="ProjectExplorer.BuildConfiguration.ClearSystemEnvironment">false</value>
|
||||
<valuelist type="QVariantList" key="ProjectExplorer.BuildConfiguration.UserEnvironmentChanges"/>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">Profile</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName"></value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">Qt4ProjectManager.Qt4BuildConfiguration</value>
|
||||
<value type="int" key="Qt4ProjectManager.Qt4BuildConfiguration.BuildConfiguration">0</value>
|
||||
<value type="bool" key="Qt4ProjectManager.Qt4BuildConfiguration.UseShadowBuild">true</value>
|
||||
</valuemap>
|
||||
<value type="int" key="ProjectExplorer.Target.BuildConfigurationCount">3</value>
|
||||
<valuemap type="QVariantMap" key="ProjectExplorer.Target.DeployConfiguration.0">
|
||||
<valuemap type="QVariantMap" key="ProjectExplorer.BuildConfiguration.BuildStepList.0">
|
||||
<value type="int" key="ProjectExplorer.BuildStepList.StepsCount">0</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">部署</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName"></value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">ProjectExplorer.BuildSteps.Deploy</value>
|
||||
</valuemap>
|
||||
<value type="int" key="ProjectExplorer.BuildConfiguration.BuildStepListCount">1</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">在本地部署</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName"></value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">ProjectExplorer.DefaultDeployConfiguration</value>
|
||||
</valuemap>
|
||||
<value type="int" key="ProjectExplorer.Target.DeployConfigurationCount">1</value>
|
||||
<valuemap type="QVariantMap" key="ProjectExplorer.Target.PluginSettings"/>
|
||||
<valuemap type="QVariantMap" key="ProjectExplorer.Target.RunConfiguration.0">
|
||||
<value type="bool" key="Analyzer.QmlProfiler.AggregateTraces">false</value>
|
||||
<value type="bool" key="Analyzer.QmlProfiler.FlushEnabled">false</value>
|
||||
<value type="uint" key="Analyzer.QmlProfiler.FlushInterval">1000</value>
|
||||
<value type="QString" key="Analyzer.QmlProfiler.LastTraceFile"></value>
|
||||
<value type="bool" key="Analyzer.QmlProfiler.Settings.UseGlobalSettings">true</value>
|
||||
<valuelist type="QVariantList" key="Analyzer.Valgrind.AddedSuppressionFiles"/>
|
||||
<value type="bool" key="Analyzer.Valgrind.Callgrind.CollectBusEvents">false</value>
|
||||
<value type="bool" key="Analyzer.Valgrind.Callgrind.CollectSystime">false</value>
|
||||
<value type="bool" key="Analyzer.Valgrind.Callgrind.EnableBranchSim">false</value>
|
||||
<value type="bool" key="Analyzer.Valgrind.Callgrind.EnableCacheSim">false</value>
|
||||
<value type="bool" key="Analyzer.Valgrind.Callgrind.EnableEventToolTips">true</value>
|
||||
<value type="double" key="Analyzer.Valgrind.Callgrind.MinimumCostRatio">0.01</value>
|
||||
<value type="double" key="Analyzer.Valgrind.Callgrind.VisualisationMinimumCostRatio">10</value>
|
||||
<value type="bool" key="Analyzer.Valgrind.FilterExternalIssues">true</value>
|
||||
<value type="int" key="Analyzer.Valgrind.LeakCheckOnFinish">1</value>
|
||||
<value type="int" key="Analyzer.Valgrind.NumCallers">25</value>
|
||||
<valuelist type="QVariantList" key="Analyzer.Valgrind.RemovedSuppressionFiles"/>
|
||||
<value type="int" key="Analyzer.Valgrind.SelfModifyingCodeDetection">1</value>
|
||||
<value type="bool" key="Analyzer.Valgrind.Settings.UseGlobalSettings">true</value>
|
||||
<value type="bool" key="Analyzer.Valgrind.ShowReachable">false</value>
|
||||
<value type="bool" key="Analyzer.Valgrind.TrackOrigins">true</value>
|
||||
<value type="QString" key="Analyzer.Valgrind.ValgrindExecutable">valgrind</value>
|
||||
<valuelist type="QVariantList" key="Analyzer.Valgrind.VisibleErrorKinds">
|
||||
<value type="int">0</value>
|
||||
<value type="int">1</value>
|
||||
<value type="int">2</value>
|
||||
<value type="int">3</value>
|
||||
<value type="int">4</value>
|
||||
<value type="int">5</value>
|
||||
<value type="int">6</value>
|
||||
<value type="int">7</value>
|
||||
<value type="int">8</value>
|
||||
<value type="int">9</value>
|
||||
<value type="int">10</value>
|
||||
<value type="int">11</value>
|
||||
<value type="int">12</value>
|
||||
<value type="int">13</value>
|
||||
<value type="int">14</value>
|
||||
</valuelist>
|
||||
<value type="int" key="PE.EnvironmentAspect.Base">2</value>
|
||||
<valuelist type="QVariantList" key="PE.EnvironmentAspect.Changes"/>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">testWindow</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName"></value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">Qt4ProjectManager.Qt4RunConfiguration:/home/han/qtinput/QtInputMethod_GooglePinyin/testWindow/testWindow.pro</value>
|
||||
<value type="bool" key="QmakeProjectManager.QmakeRunConfiguration.UseLibrarySearchPath">true</value>
|
||||
<value type="QString" key="Qt4ProjectManager.Qt4RunConfiguration.CommandLineArguments"></value>
|
||||
<value type="QString" key="Qt4ProjectManager.Qt4RunConfiguration.ProFile">testWindow/testWindow.pro</value>
|
||||
<value type="bool" key="Qt4ProjectManager.Qt4RunConfiguration.UseDyldImageSuffix">false</value>
|
||||
<value type="QString" key="Qt4ProjectManager.Qt4RunConfiguration.UserWorkingDirectory"></value>
|
||||
<value type="QString" key="Qt4ProjectManager.Qt4RunConfiguration.UserWorkingDirectory.default">/../target</value>
|
||||
<value type="uint" key="RunConfiguration.QmlDebugServerPort">3768</value>
|
||||
<value type="bool" key="RunConfiguration.UseCppDebugger">false</value>
|
||||
<value type="bool" key="RunConfiguration.UseCppDebuggerAuto">true</value>
|
||||
<value type="bool" key="RunConfiguration.UseMultiProcess">false</value>
|
||||
<value type="bool" key="RunConfiguration.UseQmlDebugger">false</value>
|
||||
<value type="bool" key="RunConfiguration.UseQmlDebuggerAuto">true</value>
|
||||
</valuemap>
|
||||
<value type="int" key="ProjectExplorer.Target.RunConfigurationCount">1</value>
|
||||
</valuemap>
|
||||
</data>
|
||||
<data>
|
||||
<variable>ProjectExplorer.Project.Target.1</variable>
|
||||
<valuemap type="QVariantMap">
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">imx6ull</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName">imx6ull</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">{76d84a76-b517-431b-8e29-c381d34bd8b9}</value>
|
||||
<value type="int" key="ProjectExplorer.Target.ActiveBuildConfiguration">0</value>
|
||||
<value type="int" key="ProjectExplorer.Target.ActiveDeployConfiguration">0</value>
|
||||
<value type="int" key="ProjectExplorer.Target.ActiveRunConfiguration">0</value>
|
||||
<valuemap type="QVariantMap" key="ProjectExplorer.Target.BuildConfiguration.0">
|
||||
<value type="QString" key="ProjectExplorer.BuildConfiguration.BuildDirectory">/home/han/qtinput/build-tgtsmlInputContextPlugin-imx6ull-Debug</value>
|
||||
<valuemap type="QVariantMap" key="ProjectExplorer.BuildConfiguration.BuildStepList.0">
|
||||
<valuemap type="QVariantMap" key="ProjectExplorer.BuildStepList.Step.0">
|
||||
<value type="bool" key="ProjectExplorer.BuildStep.Enabled">true</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">qmake</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName"></value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">QtProjectManager.QMakeBuildStep</value>
|
||||
<value type="bool" key="QtProjectManager.QMakeBuildStep.LinkQmlDebuggingLibrary">true</value>
|
||||
<value type="QString" key="QtProjectManager.QMakeBuildStep.QMakeArguments"></value>
|
||||
<value type="bool" key="QtProjectManager.QMakeBuildStep.QMakeForced">false</value>
|
||||
<value type="bool" key="QtProjectManager.QMakeBuildStep.SeparateDebugInfo">false</value>
|
||||
<value type="bool" key="QtProjectManager.QMakeBuildStep.UseQtQuickCompiler">false</value>
|
||||
</valuemap>
|
||||
<valuemap type="QVariantMap" key="ProjectExplorer.BuildStepList.Step.1">
|
||||
<value type="bool" key="ProjectExplorer.BuildStep.Enabled">true</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">Make</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName"></value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">Qt4ProjectManager.MakeStep</value>
|
||||
<valuelist type="QVariantList" key="Qt4ProjectManager.MakeStep.AutomaticallyAddedMakeArguments">
|
||||
<value type="QString">-w</value>
|
||||
<value type="QString">-r</value>
|
||||
</valuelist>
|
||||
<value type="bool" key="Qt4ProjectManager.MakeStep.Clean">false</value>
|
||||
<value type="QString" key="Qt4ProjectManager.MakeStep.MakeArguments"></value>
|
||||
<value type="QString" key="Qt4ProjectManager.MakeStep.MakeCommand"></value>
|
||||
</valuemap>
|
||||
<value type="int" key="ProjectExplorer.BuildStepList.StepsCount">2</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">构建</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName"></value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">ProjectExplorer.BuildSteps.Build</value>
|
||||
</valuemap>
|
||||
<valuemap type="QVariantMap" key="ProjectExplorer.BuildConfiguration.BuildStepList.1">
|
||||
<valuemap type="QVariantMap" key="ProjectExplorer.BuildStepList.Step.0">
|
||||
<value type="bool" key="ProjectExplorer.BuildStep.Enabled">true</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">Make</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName"></value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">Qt4ProjectManager.MakeStep</value>
|
||||
<valuelist type="QVariantList" key="Qt4ProjectManager.MakeStep.AutomaticallyAddedMakeArguments">
|
||||
<value type="QString">-w</value>
|
||||
<value type="QString">-r</value>
|
||||
</valuelist>
|
||||
<value type="bool" key="Qt4ProjectManager.MakeStep.Clean">true</value>
|
||||
<value type="QString" key="Qt4ProjectManager.MakeStep.MakeArguments">clean</value>
|
||||
<value type="QString" key="Qt4ProjectManager.MakeStep.MakeCommand"></value>
|
||||
</valuemap>
|
||||
<value type="int" key="ProjectExplorer.BuildStepList.StepsCount">1</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">清理</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName"></value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">ProjectExplorer.BuildSteps.Clean</value>
|
||||
</valuemap>
|
||||
<value type="int" key="ProjectExplorer.BuildConfiguration.BuildStepListCount">2</value>
|
||||
<value type="bool" key="ProjectExplorer.BuildConfiguration.ClearSystemEnvironment">false</value>
|
||||
<valuelist type="QVariantList" key="ProjectExplorer.BuildConfiguration.UserEnvironmentChanges"/>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">Debug</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName"></value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">Qt4ProjectManager.Qt4BuildConfiguration</value>
|
||||
<value type="int" key="Qt4ProjectManager.Qt4BuildConfiguration.BuildConfiguration">2</value>
|
||||
<value type="bool" key="Qt4ProjectManager.Qt4BuildConfiguration.UseShadowBuild">true</value>
|
||||
</valuemap>
|
||||
<valuemap type="QVariantMap" key="ProjectExplorer.Target.BuildConfiguration.1">
|
||||
<value type="QString" key="ProjectExplorer.BuildConfiguration.BuildDirectory">/home/han/qtinput/build-tgtsmlInputContextPlugin-imx6ull-Release</value>
|
||||
<valuemap type="QVariantMap" key="ProjectExplorer.BuildConfiguration.BuildStepList.0">
|
||||
<valuemap type="QVariantMap" key="ProjectExplorer.BuildStepList.Step.0">
|
||||
<value type="bool" key="ProjectExplorer.BuildStep.Enabled">true</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">qmake</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName"></value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">QtProjectManager.QMakeBuildStep</value>
|
||||
<value type="bool" key="QtProjectManager.QMakeBuildStep.LinkQmlDebuggingLibrary">false</value>
|
||||
<value type="QString" key="QtProjectManager.QMakeBuildStep.QMakeArguments"></value>
|
||||
<value type="bool" key="QtProjectManager.QMakeBuildStep.QMakeForced">false</value>
|
||||
<value type="bool" key="QtProjectManager.QMakeBuildStep.SeparateDebugInfo">false</value>
|
||||
<value type="bool" key="QtProjectManager.QMakeBuildStep.UseQtQuickCompiler">false</value>
|
||||
</valuemap>
|
||||
<valuemap type="QVariantMap" key="ProjectExplorer.BuildStepList.Step.1">
|
||||
<value type="bool" key="ProjectExplorer.BuildStep.Enabled">true</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">Make</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName"></value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">Qt4ProjectManager.MakeStep</value>
|
||||
<valuelist type="QVariantList" key="Qt4ProjectManager.MakeStep.AutomaticallyAddedMakeArguments">
|
||||
<value type="QString">-w</value>
|
||||
<value type="QString">-r</value>
|
||||
</valuelist>
|
||||
<value type="bool" key="Qt4ProjectManager.MakeStep.Clean">false</value>
|
||||
<value type="QString" key="Qt4ProjectManager.MakeStep.MakeArguments"></value>
|
||||
<value type="QString" key="Qt4ProjectManager.MakeStep.MakeCommand"></value>
|
||||
</valuemap>
|
||||
<value type="int" key="ProjectExplorer.BuildStepList.StepsCount">2</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">构建</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName"></value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">ProjectExplorer.BuildSteps.Build</value>
|
||||
</valuemap>
|
||||
<valuemap type="QVariantMap" key="ProjectExplorer.BuildConfiguration.BuildStepList.1">
|
||||
<valuemap type="QVariantMap" key="ProjectExplorer.BuildStepList.Step.0">
|
||||
<value type="bool" key="ProjectExplorer.BuildStep.Enabled">true</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">Make</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName"></value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">Qt4ProjectManager.MakeStep</value>
|
||||
<valuelist type="QVariantList" key="Qt4ProjectManager.MakeStep.AutomaticallyAddedMakeArguments">
|
||||
<value type="QString">-w</value>
|
||||
<value type="QString">-r</value>
|
||||
</valuelist>
|
||||
<value type="bool" key="Qt4ProjectManager.MakeStep.Clean">true</value>
|
||||
<value type="QString" key="Qt4ProjectManager.MakeStep.MakeArguments">clean</value>
|
||||
<value type="QString" key="Qt4ProjectManager.MakeStep.MakeCommand"></value>
|
||||
</valuemap>
|
||||
<value type="int" key="ProjectExplorer.BuildStepList.StepsCount">1</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">清理</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName"></value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">ProjectExplorer.BuildSteps.Clean</value>
|
||||
</valuemap>
|
||||
<value type="int" key="ProjectExplorer.BuildConfiguration.BuildStepListCount">2</value>
|
||||
<value type="bool" key="ProjectExplorer.BuildConfiguration.ClearSystemEnvironment">false</value>
|
||||
<valuelist type="QVariantList" key="ProjectExplorer.BuildConfiguration.UserEnvironmentChanges"/>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">Release</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName"></value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">Qt4ProjectManager.Qt4BuildConfiguration</value>
|
||||
<value type="int" key="Qt4ProjectManager.Qt4BuildConfiguration.BuildConfiguration">0</value>
|
||||
<value type="bool" key="Qt4ProjectManager.Qt4BuildConfiguration.UseShadowBuild">true</value>
|
||||
</valuemap>
|
||||
<valuemap type="QVariantMap" key="ProjectExplorer.Target.BuildConfiguration.2">
|
||||
<value type="QString" key="ProjectExplorer.BuildConfiguration.BuildDirectory">/home/han/qtinput/build-tgtsmlInputContextPlugin-imx6ull-Profile</value>
|
||||
<valuemap type="QVariantMap" key="ProjectExplorer.BuildConfiguration.BuildStepList.0">
|
||||
<valuemap type="QVariantMap" key="ProjectExplorer.BuildStepList.Step.0">
|
||||
<value type="bool" key="ProjectExplorer.BuildStep.Enabled">true</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">qmake</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName"></value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">QtProjectManager.QMakeBuildStep</value>
|
||||
<value type="bool" key="QtProjectManager.QMakeBuildStep.LinkQmlDebuggingLibrary">true</value>
|
||||
<value type="QString" key="QtProjectManager.QMakeBuildStep.QMakeArguments"></value>
|
||||
<value type="bool" key="QtProjectManager.QMakeBuildStep.QMakeForced">false</value>
|
||||
<value type="bool" key="QtProjectManager.QMakeBuildStep.SeparateDebugInfo">true</value>
|
||||
<value type="bool" key="QtProjectManager.QMakeBuildStep.UseQtQuickCompiler">false</value>
|
||||
</valuemap>
|
||||
<valuemap type="QVariantMap" key="ProjectExplorer.BuildStepList.Step.1">
|
||||
<value type="bool" key="ProjectExplorer.BuildStep.Enabled">true</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">Make</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName"></value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">Qt4ProjectManager.MakeStep</value>
|
||||
<valuelist type="QVariantList" key="Qt4ProjectManager.MakeStep.AutomaticallyAddedMakeArguments">
|
||||
<value type="QString">-w</value>
|
||||
<value type="QString">-r</value>
|
||||
</valuelist>
|
||||
<value type="bool" key="Qt4ProjectManager.MakeStep.Clean">false</value>
|
||||
<value type="QString" key="Qt4ProjectManager.MakeStep.MakeArguments"></value>
|
||||
<value type="QString" key="Qt4ProjectManager.MakeStep.MakeCommand"></value>
|
||||
</valuemap>
|
||||
<value type="int" key="ProjectExplorer.BuildStepList.StepsCount">2</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">构建</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName"></value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">ProjectExplorer.BuildSteps.Build</value>
|
||||
</valuemap>
|
||||
<valuemap type="QVariantMap" key="ProjectExplorer.BuildConfiguration.BuildStepList.1">
|
||||
<valuemap type="QVariantMap" key="ProjectExplorer.BuildStepList.Step.0">
|
||||
<value type="bool" key="ProjectExplorer.BuildStep.Enabled">true</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">Make</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName"></value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">Qt4ProjectManager.MakeStep</value>
|
||||
<valuelist type="QVariantList" key="Qt4ProjectManager.MakeStep.AutomaticallyAddedMakeArguments">
|
||||
<value type="QString">-w</value>
|
||||
<value type="QString">-r</value>
|
||||
</valuelist>
|
||||
<value type="bool" key="Qt4ProjectManager.MakeStep.Clean">true</value>
|
||||
<value type="QString" key="Qt4ProjectManager.MakeStep.MakeArguments">clean</value>
|
||||
<value type="QString" key="Qt4ProjectManager.MakeStep.MakeCommand"></value>
|
||||
</valuemap>
|
||||
<value type="int" key="ProjectExplorer.BuildStepList.StepsCount">1</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">清理</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName"></value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">ProjectExplorer.BuildSteps.Clean</value>
|
||||
</valuemap>
|
||||
<value type="int" key="ProjectExplorer.BuildConfiguration.BuildStepListCount">2</value>
|
||||
<value type="bool" key="ProjectExplorer.BuildConfiguration.ClearSystemEnvironment">false</value>
|
||||
<valuelist type="QVariantList" key="ProjectExplorer.BuildConfiguration.UserEnvironmentChanges"/>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">Profile</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName"></value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">Qt4ProjectManager.Qt4BuildConfiguration</value>
|
||||
<value type="int" key="Qt4ProjectManager.Qt4BuildConfiguration.BuildConfiguration">0</value>
|
||||
<value type="bool" key="Qt4ProjectManager.Qt4BuildConfiguration.UseShadowBuild">true</value>
|
||||
</valuemap>
|
||||
<value type="int" key="ProjectExplorer.Target.BuildConfigurationCount">3</value>
|
||||
<valuemap type="QVariantMap" key="ProjectExplorer.Target.DeployConfiguration.0">
|
||||
<valuemap type="QVariantMap" key="ProjectExplorer.BuildConfiguration.BuildStepList.0">
|
||||
<value type="int" key="ProjectExplorer.BuildStepList.StepsCount">0</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">部署</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName"></value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">ProjectExplorer.BuildSteps.Deploy</value>
|
||||
</valuemap>
|
||||
<value type="int" key="ProjectExplorer.BuildConfiguration.BuildStepListCount">1</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">在本地部署</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName"></value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">ProjectExplorer.DefaultDeployConfiguration</value>
|
||||
</valuemap>
|
||||
<value type="int" key="ProjectExplorer.Target.DeployConfigurationCount">1</value>
|
||||
<valuemap type="QVariantMap" key="ProjectExplorer.Target.PluginSettings"/>
|
||||
<valuemap type="QVariantMap" key="ProjectExplorer.Target.RunConfiguration.0">
|
||||
<value type="bool" key="Analyzer.QmlProfiler.AggregateTraces">false</value>
|
||||
<value type="bool" key="Analyzer.QmlProfiler.FlushEnabled">false</value>
|
||||
<value type="uint" key="Analyzer.QmlProfiler.FlushInterval">1000</value>
|
||||
<value type="QString" key="Analyzer.QmlProfiler.LastTraceFile"></value>
|
||||
<value type="bool" key="Analyzer.QmlProfiler.Settings.UseGlobalSettings">true</value>
|
||||
<valuelist type="QVariantList" key="Analyzer.Valgrind.AddedSuppressionFiles"/>
|
||||
<value type="bool" key="Analyzer.Valgrind.Callgrind.CollectBusEvents">false</value>
|
||||
<value type="bool" key="Analyzer.Valgrind.Callgrind.CollectSystime">false</value>
|
||||
<value type="bool" key="Analyzer.Valgrind.Callgrind.EnableBranchSim">false</value>
|
||||
<value type="bool" key="Analyzer.Valgrind.Callgrind.EnableCacheSim">false</value>
|
||||
<value type="bool" key="Analyzer.Valgrind.Callgrind.EnableEventToolTips">true</value>
|
||||
<value type="double" key="Analyzer.Valgrind.Callgrind.MinimumCostRatio">0.01</value>
|
||||
<value type="double" key="Analyzer.Valgrind.Callgrind.VisualisationMinimumCostRatio">10</value>
|
||||
<value type="bool" key="Analyzer.Valgrind.FilterExternalIssues">true</value>
|
||||
<value type="int" key="Analyzer.Valgrind.LeakCheckOnFinish">1</value>
|
||||
<value type="int" key="Analyzer.Valgrind.NumCallers">25</value>
|
||||
<valuelist type="QVariantList" key="Analyzer.Valgrind.RemovedSuppressionFiles"/>
|
||||
<value type="int" key="Analyzer.Valgrind.SelfModifyingCodeDetection">1</value>
|
||||
<value type="bool" key="Analyzer.Valgrind.Settings.UseGlobalSettings">true</value>
|
||||
<value type="bool" key="Analyzer.Valgrind.ShowReachable">false</value>
|
||||
<value type="bool" key="Analyzer.Valgrind.TrackOrigins">true</value>
|
||||
<value type="QString" key="Analyzer.Valgrind.ValgrindExecutable">valgrind</value>
|
||||
<valuelist type="QVariantList" key="Analyzer.Valgrind.VisibleErrorKinds">
|
||||
<value type="int">0</value>
|
||||
<value type="int">1</value>
|
||||
<value type="int">2</value>
|
||||
<value type="int">3</value>
|
||||
<value type="int">4</value>
|
||||
<value type="int">5</value>
|
||||
<value type="int">6</value>
|
||||
<value type="int">7</value>
|
||||
<value type="int">8</value>
|
||||
<value type="int">9</value>
|
||||
<value type="int">10</value>
|
||||
<value type="int">11</value>
|
||||
<value type="int">12</value>
|
||||
<value type="int">13</value>
|
||||
<value type="int">14</value>
|
||||
</valuelist>
|
||||
<value type="int" key="PE.EnvironmentAspect.Base">-1</value>
|
||||
<valuelist type="QVariantList" key="PE.EnvironmentAspect.Changes"/>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">testWindow</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName"></value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">Qt4ProjectManager.Qt4RunConfiguration:/home/han/qtinput/QtInputMethod_GooglePinyin/testWindow/testWindow.pro</value>
|
||||
<value type="bool" key="QmakeProjectManager.QmakeRunConfiguration.UseLibrarySearchPath">true</value>
|
||||
<value type="QString" key="Qt4ProjectManager.Qt4RunConfiguration.CommandLineArguments"></value>
|
||||
<value type="QString" key="Qt4ProjectManager.Qt4RunConfiguration.ProFile">testWindow/testWindow.pro</value>
|
||||
<value type="bool" key="Qt4ProjectManager.Qt4RunConfiguration.UseDyldImageSuffix">false</value>
|
||||
<value type="QString" key="Qt4ProjectManager.Qt4RunConfiguration.UserWorkingDirectory"></value>
|
||||
<value type="QString" key="Qt4ProjectManager.Qt4RunConfiguration.UserWorkingDirectory.default"></value>
|
||||
<value type="uint" key="RunConfiguration.QmlDebugServerPort">3768</value>
|
||||
<value type="bool" key="RunConfiguration.UseCppDebugger">false</value>
|
||||
<value type="bool" key="RunConfiguration.UseCppDebuggerAuto">true</value>
|
||||
<value type="bool" key="RunConfiguration.UseMultiProcess">false</value>
|
||||
<value type="bool" key="RunConfiguration.UseQmlDebugger">false</value>
|
||||
<value type="bool" key="RunConfiguration.UseQmlDebuggerAuto">true</value>
|
||||
</valuemap>
|
||||
<value type="int" key="ProjectExplorer.Target.RunConfigurationCount">1</value>
|
||||
</valuemap>
|
||||
</data>
|
||||
<data>
|
||||
<variable>ProjectExplorer.Project.Target.2</variable>
|
||||
<valuemap type="QVariantMap">
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">ARM-A64</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName">ARM-A64</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">{8e99dd39-dea9-494c-aff4-641bf74d0113}</value>
|
||||
<value type="int" key="ProjectExplorer.Target.ActiveBuildConfiguration">0</value>
|
||||
<value type="int" key="ProjectExplorer.Target.ActiveDeployConfiguration">0</value>
|
||||
<value type="int" key="ProjectExplorer.Target.ActiveRunConfiguration">0</value>
|
||||
<valuemap type="QVariantMap" key="ProjectExplorer.Target.BuildConfiguration.0">
|
||||
<value type="QString" key="ProjectExplorer.BuildConfiguration.BuildDirectory">/home/han/qtinput/build-tgtsmlInputContextPlugin-ARM_A64-Debug</value>
|
||||
<valuemap type="QVariantMap" key="ProjectExplorer.BuildConfiguration.BuildStepList.0">
|
||||
<valuemap type="QVariantMap" key="ProjectExplorer.BuildStepList.Step.0">
|
||||
<value type="bool" key="ProjectExplorer.BuildStep.Enabled">true</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">qmake</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName"></value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">QtProjectManager.QMakeBuildStep</value>
|
||||
<value type="bool" key="QtProjectManager.QMakeBuildStep.LinkQmlDebuggingLibrary">true</value>
|
||||
<value type="QString" key="QtProjectManager.QMakeBuildStep.QMakeArguments"></value>
|
||||
<value type="bool" key="QtProjectManager.QMakeBuildStep.QMakeForced">false</value>
|
||||
<value type="bool" key="QtProjectManager.QMakeBuildStep.SeparateDebugInfo">false</value>
|
||||
<value type="bool" key="QtProjectManager.QMakeBuildStep.UseQtQuickCompiler">false</value>
|
||||
</valuemap>
|
||||
<valuemap type="QVariantMap" key="ProjectExplorer.BuildStepList.Step.1">
|
||||
<value type="bool" key="ProjectExplorer.BuildStep.Enabled">true</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">Make</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName"></value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">Qt4ProjectManager.MakeStep</value>
|
||||
<valuelist type="QVariantList" key="Qt4ProjectManager.MakeStep.AutomaticallyAddedMakeArguments">
|
||||
<value type="QString">-w</value>
|
||||
<value type="QString">-r</value>
|
||||
</valuelist>
|
||||
<value type="bool" key="Qt4ProjectManager.MakeStep.Clean">false</value>
|
||||
<value type="QString" key="Qt4ProjectManager.MakeStep.MakeArguments"></value>
|
||||
<value type="QString" key="Qt4ProjectManager.MakeStep.MakeCommand"></value>
|
||||
</valuemap>
|
||||
<value type="int" key="ProjectExplorer.BuildStepList.StepsCount">2</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">构建</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName"></value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">ProjectExplorer.BuildSteps.Build</value>
|
||||
</valuemap>
|
||||
<valuemap type="QVariantMap" key="ProjectExplorer.BuildConfiguration.BuildStepList.1">
|
||||
<valuemap type="QVariantMap" key="ProjectExplorer.BuildStepList.Step.0">
|
||||
<value type="bool" key="ProjectExplorer.BuildStep.Enabled">true</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">Make</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName"></value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">Qt4ProjectManager.MakeStep</value>
|
||||
<valuelist type="QVariantList" key="Qt4ProjectManager.MakeStep.AutomaticallyAddedMakeArguments">
|
||||
<value type="QString">-w</value>
|
||||
<value type="QString">-r</value>
|
||||
</valuelist>
|
||||
<value type="bool" key="Qt4ProjectManager.MakeStep.Clean">true</value>
|
||||
<value type="QString" key="Qt4ProjectManager.MakeStep.MakeArguments">clean</value>
|
||||
<value type="QString" key="Qt4ProjectManager.MakeStep.MakeCommand"></value>
|
||||
</valuemap>
|
||||
<value type="int" key="ProjectExplorer.BuildStepList.StepsCount">1</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">清理</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName"></value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">ProjectExplorer.BuildSteps.Clean</value>
|
||||
</valuemap>
|
||||
<value type="int" key="ProjectExplorer.BuildConfiguration.BuildStepListCount">2</value>
|
||||
<value type="bool" key="ProjectExplorer.BuildConfiguration.ClearSystemEnvironment">false</value>
|
||||
<valuelist type="QVariantList" key="ProjectExplorer.BuildConfiguration.UserEnvironmentChanges"/>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">Debug</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName"></value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">Qt4ProjectManager.Qt4BuildConfiguration</value>
|
||||
<value type="int" key="Qt4ProjectManager.Qt4BuildConfiguration.BuildConfiguration">2</value>
|
||||
<value type="bool" key="Qt4ProjectManager.Qt4BuildConfiguration.UseShadowBuild">true</value>
|
||||
</valuemap>
|
||||
<valuemap type="QVariantMap" key="ProjectExplorer.Target.BuildConfiguration.1">
|
||||
<value type="QString" key="ProjectExplorer.BuildConfiguration.BuildDirectory">/home/han/qtinput/build-tgtsmlInputContextPlugin-ARM_A64-Release</value>
|
||||
<valuemap type="QVariantMap" key="ProjectExplorer.BuildConfiguration.BuildStepList.0">
|
||||
<valuemap type="QVariantMap" key="ProjectExplorer.BuildStepList.Step.0">
|
||||
<value type="bool" key="ProjectExplorer.BuildStep.Enabled">true</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">qmake</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName"></value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">QtProjectManager.QMakeBuildStep</value>
|
||||
<value type="bool" key="QtProjectManager.QMakeBuildStep.LinkQmlDebuggingLibrary">false</value>
|
||||
<value type="QString" key="QtProjectManager.QMakeBuildStep.QMakeArguments"></value>
|
||||
<value type="bool" key="QtProjectManager.QMakeBuildStep.QMakeForced">false</value>
|
||||
<value type="bool" key="QtProjectManager.QMakeBuildStep.SeparateDebugInfo">false</value>
|
||||
<value type="bool" key="QtProjectManager.QMakeBuildStep.UseQtQuickCompiler">false</value>
|
||||
</valuemap>
|
||||
<valuemap type="QVariantMap" key="ProjectExplorer.BuildStepList.Step.1">
|
||||
<value type="bool" key="ProjectExplorer.BuildStep.Enabled">true</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">Make</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName"></value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">Qt4ProjectManager.MakeStep</value>
|
||||
<valuelist type="QVariantList" key="Qt4ProjectManager.MakeStep.AutomaticallyAddedMakeArguments">
|
||||
<value type="QString">-w</value>
|
||||
<value type="QString">-r</value>
|
||||
</valuelist>
|
||||
<value type="bool" key="Qt4ProjectManager.MakeStep.Clean">false</value>
|
||||
<value type="QString" key="Qt4ProjectManager.MakeStep.MakeArguments"></value>
|
||||
<value type="QString" key="Qt4ProjectManager.MakeStep.MakeCommand"></value>
|
||||
</valuemap>
|
||||
<value type="int" key="ProjectExplorer.BuildStepList.StepsCount">2</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">构建</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName"></value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">ProjectExplorer.BuildSteps.Build</value>
|
||||
</valuemap>
|
||||
<valuemap type="QVariantMap" key="ProjectExplorer.BuildConfiguration.BuildStepList.1">
|
||||
<valuemap type="QVariantMap" key="ProjectExplorer.BuildStepList.Step.0">
|
||||
<value type="bool" key="ProjectExplorer.BuildStep.Enabled">true</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">Make</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName"></value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">Qt4ProjectManager.MakeStep</value>
|
||||
<valuelist type="QVariantList" key="Qt4ProjectManager.MakeStep.AutomaticallyAddedMakeArguments">
|
||||
<value type="QString">-w</value>
|
||||
<value type="QString">-r</value>
|
||||
</valuelist>
|
||||
<value type="bool" key="Qt4ProjectManager.MakeStep.Clean">true</value>
|
||||
<value type="QString" key="Qt4ProjectManager.MakeStep.MakeArguments">clean</value>
|
||||
<value type="QString" key="Qt4ProjectManager.MakeStep.MakeCommand"></value>
|
||||
</valuemap>
|
||||
<value type="int" key="ProjectExplorer.BuildStepList.StepsCount">1</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">清理</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName"></value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">ProjectExplorer.BuildSteps.Clean</value>
|
||||
</valuemap>
|
||||
<value type="int" key="ProjectExplorer.BuildConfiguration.BuildStepListCount">2</value>
|
||||
<value type="bool" key="ProjectExplorer.BuildConfiguration.ClearSystemEnvironment">false</value>
|
||||
<valuelist type="QVariantList" key="ProjectExplorer.BuildConfiguration.UserEnvironmentChanges"/>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">Release</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName"></value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">Qt4ProjectManager.Qt4BuildConfiguration</value>
|
||||
<value type="int" key="Qt4ProjectManager.Qt4BuildConfiguration.BuildConfiguration">0</value>
|
||||
<value type="bool" key="Qt4ProjectManager.Qt4BuildConfiguration.UseShadowBuild">true</value>
|
||||
</valuemap>
|
||||
<valuemap type="QVariantMap" key="ProjectExplorer.Target.BuildConfiguration.2">
|
||||
<value type="QString" key="ProjectExplorer.BuildConfiguration.BuildDirectory">/home/han/qtinput/build-tgtsmlInputContextPlugin-ARM_A64-Profile</value>
|
||||
<valuemap type="QVariantMap" key="ProjectExplorer.BuildConfiguration.BuildStepList.0">
|
||||
<valuemap type="QVariantMap" key="ProjectExplorer.BuildStepList.Step.0">
|
||||
<value type="bool" key="ProjectExplorer.BuildStep.Enabled">true</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">qmake</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName"></value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">QtProjectManager.QMakeBuildStep</value>
|
||||
<value type="bool" key="QtProjectManager.QMakeBuildStep.LinkQmlDebuggingLibrary">true</value>
|
||||
<value type="QString" key="QtProjectManager.QMakeBuildStep.QMakeArguments"></value>
|
||||
<value type="bool" key="QtProjectManager.QMakeBuildStep.QMakeForced">false</value>
|
||||
<value type="bool" key="QtProjectManager.QMakeBuildStep.SeparateDebugInfo">true</value>
|
||||
<value type="bool" key="QtProjectManager.QMakeBuildStep.UseQtQuickCompiler">false</value>
|
||||
</valuemap>
|
||||
<valuemap type="QVariantMap" key="ProjectExplorer.BuildStepList.Step.1">
|
||||
<value type="bool" key="ProjectExplorer.BuildStep.Enabled">true</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">Make</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName"></value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">Qt4ProjectManager.MakeStep</value>
|
||||
<valuelist type="QVariantList" key="Qt4ProjectManager.MakeStep.AutomaticallyAddedMakeArguments">
|
||||
<value type="QString">-w</value>
|
||||
<value type="QString">-r</value>
|
||||
</valuelist>
|
||||
<value type="bool" key="Qt4ProjectManager.MakeStep.Clean">false</value>
|
||||
<value type="QString" key="Qt4ProjectManager.MakeStep.MakeArguments"></value>
|
||||
<value type="QString" key="Qt4ProjectManager.MakeStep.MakeCommand"></value>
|
||||
</valuemap>
|
||||
<value type="int" key="ProjectExplorer.BuildStepList.StepsCount">2</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">构建</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName"></value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">ProjectExplorer.BuildSteps.Build</value>
|
||||
</valuemap>
|
||||
<valuemap type="QVariantMap" key="ProjectExplorer.BuildConfiguration.BuildStepList.1">
|
||||
<valuemap type="QVariantMap" key="ProjectExplorer.BuildStepList.Step.0">
|
||||
<value type="bool" key="ProjectExplorer.BuildStep.Enabled">true</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">Make</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName"></value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">Qt4ProjectManager.MakeStep</value>
|
||||
<valuelist type="QVariantList" key="Qt4ProjectManager.MakeStep.AutomaticallyAddedMakeArguments">
|
||||
<value type="QString">-w</value>
|
||||
<value type="QString">-r</value>
|
||||
</valuelist>
|
||||
<value type="bool" key="Qt4ProjectManager.MakeStep.Clean">true</value>
|
||||
<value type="QString" key="Qt4ProjectManager.MakeStep.MakeArguments">clean</value>
|
||||
<value type="QString" key="Qt4ProjectManager.MakeStep.MakeCommand"></value>
|
||||
</valuemap>
|
||||
<value type="int" key="ProjectExplorer.BuildStepList.StepsCount">1</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">清理</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName"></value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">ProjectExplorer.BuildSteps.Clean</value>
|
||||
</valuemap>
|
||||
<value type="int" key="ProjectExplorer.BuildConfiguration.BuildStepListCount">2</value>
|
||||
<value type="bool" key="ProjectExplorer.BuildConfiguration.ClearSystemEnvironment">false</value>
|
||||
<valuelist type="QVariantList" key="ProjectExplorer.BuildConfiguration.UserEnvironmentChanges"/>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">Profile</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName"></value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">Qt4ProjectManager.Qt4BuildConfiguration</value>
|
||||
<value type="int" key="Qt4ProjectManager.Qt4BuildConfiguration.BuildConfiguration">0</value>
|
||||
<value type="bool" key="Qt4ProjectManager.Qt4BuildConfiguration.UseShadowBuild">true</value>
|
||||
</valuemap>
|
||||
<value type="int" key="ProjectExplorer.Target.BuildConfigurationCount">3</value>
|
||||
<valuemap type="QVariantMap" key="ProjectExplorer.Target.DeployConfiguration.0">
|
||||
<valuemap type="QVariantMap" key="ProjectExplorer.BuildConfiguration.BuildStepList.0">
|
||||
<value type="int" key="ProjectExplorer.BuildStepList.StepsCount">0</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">部署</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName"></value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">ProjectExplorer.BuildSteps.Deploy</value>
|
||||
</valuemap>
|
||||
<value type="int" key="ProjectExplorer.BuildConfiguration.BuildStepListCount">1</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">在本地部署</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName"></value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">ProjectExplorer.DefaultDeployConfiguration</value>
|
||||
</valuemap>
|
||||
<value type="int" key="ProjectExplorer.Target.DeployConfigurationCount">1</value>
|
||||
<valuemap type="QVariantMap" key="ProjectExplorer.Target.PluginSettings"/>
|
||||
<valuemap type="QVariantMap" key="ProjectExplorer.Target.RunConfiguration.0">
|
||||
<value type="bool" key="Analyzer.QmlProfiler.AggregateTraces">false</value>
|
||||
<value type="bool" key="Analyzer.QmlProfiler.FlushEnabled">false</value>
|
||||
<value type="uint" key="Analyzer.QmlProfiler.FlushInterval">1000</value>
|
||||
<value type="QString" key="Analyzer.QmlProfiler.LastTraceFile"></value>
|
||||
<value type="bool" key="Analyzer.QmlProfiler.Settings.UseGlobalSettings">true</value>
|
||||
<valuelist type="QVariantList" key="Analyzer.Valgrind.AddedSuppressionFiles"/>
|
||||
<value type="bool" key="Analyzer.Valgrind.Callgrind.CollectBusEvents">false</value>
|
||||
<value type="bool" key="Analyzer.Valgrind.Callgrind.CollectSystime">false</value>
|
||||
<value type="bool" key="Analyzer.Valgrind.Callgrind.EnableBranchSim">false</value>
|
||||
<value type="bool" key="Analyzer.Valgrind.Callgrind.EnableCacheSim">false</value>
|
||||
<value type="bool" key="Analyzer.Valgrind.Callgrind.EnableEventToolTips">true</value>
|
||||
<value type="double" key="Analyzer.Valgrind.Callgrind.MinimumCostRatio">0.01</value>
|
||||
<value type="double" key="Analyzer.Valgrind.Callgrind.VisualisationMinimumCostRatio">10</value>
|
||||
<value type="bool" key="Analyzer.Valgrind.FilterExternalIssues">true</value>
|
||||
<value type="int" key="Analyzer.Valgrind.LeakCheckOnFinish">1</value>
|
||||
<value type="int" key="Analyzer.Valgrind.NumCallers">25</value>
|
||||
<valuelist type="QVariantList" key="Analyzer.Valgrind.RemovedSuppressionFiles"/>
|
||||
<value type="int" key="Analyzer.Valgrind.SelfModifyingCodeDetection">1</value>
|
||||
<value type="bool" key="Analyzer.Valgrind.Settings.UseGlobalSettings">true</value>
|
||||
<value type="bool" key="Analyzer.Valgrind.ShowReachable">false</value>
|
||||
<value type="bool" key="Analyzer.Valgrind.TrackOrigins">true</value>
|
||||
<value type="QString" key="Analyzer.Valgrind.ValgrindExecutable">valgrind</value>
|
||||
<valuelist type="QVariantList" key="Analyzer.Valgrind.VisibleErrorKinds">
|
||||
<value type="int">0</value>
|
||||
<value type="int">1</value>
|
||||
<value type="int">2</value>
|
||||
<value type="int">3</value>
|
||||
<value type="int">4</value>
|
||||
<value type="int">5</value>
|
||||
<value type="int">6</value>
|
||||
<value type="int">7</value>
|
||||
<value type="int">8</value>
|
||||
<value type="int">9</value>
|
||||
<value type="int">10</value>
|
||||
<value type="int">11</value>
|
||||
<value type="int">12</value>
|
||||
<value type="int">13</value>
|
||||
<value type="int">14</value>
|
||||
</valuelist>
|
||||
<value type="int" key="PE.EnvironmentAspect.Base">-1</value>
|
||||
<valuelist type="QVariantList" key="PE.EnvironmentAspect.Changes"/>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DefaultDisplayName">testWindow</value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.DisplayName"></value>
|
||||
<value type="QString" key="ProjectExplorer.ProjectConfiguration.Id">Qt4ProjectManager.Qt4RunConfiguration:/home/han/qtinput/QtInputMethod_GooglePinyin/testWindow/testWindow.pro</value>
|
||||
<value type="bool" key="QmakeProjectManager.QmakeRunConfiguration.UseLibrarySearchPath">true</value>
|
||||
<value type="QString" key="Qt4ProjectManager.Qt4RunConfiguration.CommandLineArguments"></value>
|
||||
<value type="QString" key="Qt4ProjectManager.Qt4RunConfiguration.ProFile">testWindow/testWindow.pro</value>
|
||||
<value type="bool" key="Qt4ProjectManager.Qt4RunConfiguration.UseDyldImageSuffix">false</value>
|
||||
<value type="QString" key="Qt4ProjectManager.Qt4RunConfiguration.UserWorkingDirectory"></value>
|
||||
<value type="QString" key="Qt4ProjectManager.Qt4RunConfiguration.UserWorkingDirectory.default"></value>
|
||||
<value type="uint" key="RunConfiguration.QmlDebugServerPort">3768</value>
|
||||
<value type="bool" key="RunConfiguration.UseCppDebugger">false</value>
|
||||
<value type="bool" key="RunConfiguration.UseCppDebuggerAuto">true</value>
|
||||
<value type="bool" key="RunConfiguration.UseMultiProcess">false</value>
|
||||
<value type="bool" key="RunConfiguration.UseQmlDebugger">false</value>
|
||||
<value type="bool" key="RunConfiguration.UseQmlDebuggerAuto">true</value>
|
||||
</valuemap>
|
||||
<value type="int" key="ProjectExplorer.Target.RunConfigurationCount">1</value>
|
||||
</valuemap>
|
||||
</data>
|
||||
<data>
|
||||
<variable>ProjectExplorer.Project.TargetCount</variable>
|
||||
<value type="int">3</value>
|
||||
</data>
|
||||
<data>
|
||||
<variable>ProjectExplorer.Project.Updater.FileVersion</variable>
|
||||
<value type="int">18</value>
|
||||
</data>
|
||||
<data>
|
||||
<variable>Version</variable>
|
||||
<value type="int">18</value>
|
||||
</data>
|
||||
</qtcreator>
|
Loading…
Reference in New Issue