QtInputMethod_GooglePinyin/googlepinyin/matrixsearch.h

/*
 * Copyright (C) 2009 The Android Open Source Project
 *
 * Licensed under the Apache License, Version 2.0 (the "License");
 * you may not use this file except in compliance with the License.
 * You may obtain a copy of the License at
 *
 *      http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

#ifndef PINYINIME_ANDPY_INCLUDE_MATRIXSEARCH_H__
#define PINYINIME_ANDPY_INCLUDE_MATRIXSEARCH_H__

#include <stdlib.h>
#include "./atomdictbase.h"
#include "./dicttrie.h"
#include "./searchutility.h"
#include "./spellingtrie.h"
#include "./splparser.h"

namespace ime_pinyin {

static const size_t kMaxRowNum = kMaxSearchSteps;

typedef struct {
  // MileStoneHandle objects for the system and user dictionaries.
  MileStoneHandle dict_handles[2];
  // From which DMI node. -1 means it's from root.
  PoolPosType dmi_fr;
  // The spelling id for the Pinyin string from the previous DMI to this node.
  // If it is a half id like Shengmu, the node pointed by dict_node is the first
  // node with this Shengmu,
  uint16 spl_id;
  // What's the level of the dict node. Level of root is 0, but root is never
  // recorded by dict_node.
  unsigned char dict_level:7;
  // If this node is for composing phrase, this bit is 1.
  unsigned char c_phrase:1;
  // Whether the spl_id is parsed with a split character at the end.
  unsigned char splid_end_split:1;
  // What's the length of the spelling string for this match, for the whole
  // word.
  unsigned char splstr_len:7;
  // Used to indicate whether all spelling ids from the root are full spelling
  // ids. This information is useful for keymapping mode(not finished). Because
  // in this mode, there is no clear boundaries, we prefer those results which
  // have full spelling ids.
  unsigned char all_full_id:1;
} DictMatchInfo, *PDictMatchInfo;

typedef struct MatrixNode {
  LemmaIdType id;
  float score;
  MatrixNode *from;
  // From which DMI node. Used to trace the spelling segmentation.
  PoolPosType dmi_fr;
  uint16 step;
} MatrixNode, *PMatrixNode;

typedef struct {
  // The MatrixNode position in the matrix pool
  PoolPosType mtrx_nd_pos;
  // The DictMatchInfo position in the DictMatchInfo pool.
  PoolPosType dmi_pos;
  uint16 mtrx_nd_num;
  uint16 dmi_num:15;
  // Used to indicate whether there are dmi nodes in this step with full
  // spelling id. This information is used to decide whether a substring of a
  // valid Pinyin should be extended.
  //
  // Example1: shoudao
  // When the last char 'o' is added, the parser will find "dao" is a valid
  // Pinyin, and because all dmi nodes at location 'd' (including those for
  // "shoud", and those for "d") have Shengmu id only, so it is not necessary
  // to extend "ao", otherwise the result may be "shoud ao", that is not
  // reasonable.
  //
  // Example2: hengao
  // When the last 'o' is added, the parser finds "gao" is a valid Pinyin.
  // Because some dmi nodes at 'g' has Shengmu ids (hen'g and g), but some dmi
  // nodes at 'g' has full ids ('heng'), so it is necessary to extend "ao", thus
  // "heng ao" can also be the result.
  //
  // Similarly, "ganga" is expanded to "gang a".
  //
  // For Pinyin string "xian", because "xian" is a valid Pinyin, because all dmi
  // nodes at 'x' only have Shengmu ids, the parser will not try "x ian" (and it
  // is not valid either). If the parser uses break in the loop, the result
  // always be "xian"; but if the parser uses continue in the loop, "xi an" will
  // also be tried. This behaviour can be set via the function
  // set_xi_an_switch().
  uint16 dmi_has_full_id:1;
  // Points to a MatrixNode of the current step to indicate which choice the
  // user selects.
  MatrixNode *mtrx_nd_fixed;
} MatrixRow, *PMatrixRow;

// When user inputs and selects candidates, the fixed lemma ids are stored in
// lma_id_ of class MatrixSearch, and fixed_lmas_ is used to indicate how many
// lemmas from the beginning are fixed. If user deletes Pinyin characters one
// by one from the end, these fixed lemmas can be unlocked one by one when
// necessary. Whenever user deletes a Chinese character and its spelling string
// in these fixed lemmas, all fixed lemmas will be merged together into a unit
// named ComposingPhrase with a lemma id kLemmaIdComposing, and this composing
// phrase will be the first lemma in the sentence. Because it contains some
// modified lemmas (by deleting a character), these merged lemmas are called
// sub lemmas (sublma), and each of them are represented individually, so that
// when user deletes Pinyin characters from the end, these sub lemmas can also
// be unlocked one by one.
typedef struct {
  uint16 spl_ids[kMaxRowNum];
  uint16 spl_start[kMaxRowNum];
  char16 chn_str[kMaxRowNum];       // Chinese string.
  uint16 sublma_start[kMaxRowNum];  // Counted in Chinese characters.
  size_t sublma_num;
  uint16 length;                    // Counted in Chinese characters.
} ComposingPhrase, *TComposingPhrase;

class MatrixSearch {
 private:
  // If it is true, prediction list by string whose length is greater than 1
  // will be limited to a reasonable number.
  static const bool kPredictLimitGt1 = false;

  // If it is true, the engine will prefer long history based prediction,
  // for example, when user inputs "BeiJing", we prefer "DaXue", etc., which are
  // based on the two-character history.
  static const bool kPreferLongHistoryPredict = true;

  // If it is true, prediction will only be based on user dictionary. this flag
  // is for debug purpose.
  static const bool kOnlyUserDictPredict = false;

  // The maximum buffer to store LmaPsbItems.
  static const size_t kMaxLmaPsbItems = 1450;

  // How many rows for each step.
  static const size_t kMaxNodeARow = 5;

  // The maximum length of the sentence candidates counted in chinese
  // characters
  static const size_t kMaxSentenceLength = 16;

  // The size of the matrix node pool.
  static const size_t kMtrxNdPoolSize = 200;

  // The size of the DMI node pool.
  static const size_t kDmiPoolSize = 800;

  // Used to indicate whether this object has been initialized.
  bool inited_;

  // Spelling trie.
  const SpellingTrie *spl_trie_;

  // Used to indicate this switcher status: when "xian" is parseed, should
  // "xi an" also be extended. Default is false.
  // These cases include: xia, xian, xiang, zhuan, jiang..., etc. The string
  // should be valid for a FULL spelling, or a combination of two spellings,
  // first of which is a FULL id too. So even it is true, "da" will never be
  // split into "d a", because "d" is not a full spelling id.
  bool xi_an_enabled_;

  // System dictionary.
  DictTrie* dict_trie_;

  // User dictionary.
  AtomDictBase* user_dict_;

  // Spelling parser.
  SpellingParser* spl_parser_;

  // The maximum allowed length of spelling string (such as a Pinyin string).
  size_t max_sps_len_;

  // The maximum allowed length of a result Chinese string.
  size_t max_hzs_len_;

  // Pinyin string. Max length: kMaxRowNum - 1
  char pys_[kMaxRowNum];

  // The length of the string that has been decoded successfully.
  size_t pys_decoded_len_;

  // Shared buffer for multiple purposes.
  size_t *share_buf_;

  MatrixNode *mtrx_nd_pool_;
  PoolPosType mtrx_nd_pool_used_;    // How many nodes used in the pool
  DictMatchInfo *dmi_pool_;
  PoolPosType dmi_pool_used_;        // How many items used in the pool

  MatrixRow *matrix_;                // The first row is for starting

  DictExtPara *dep_;                 // Parameter used to extend DMI nodes.

  NPredictItem *npre_items_;         // Used to do prediction
  size_t npre_items_len_;

  // The starting positions and lemma ids for the full sentence candidate.
  size_t lma_id_num_;
  uint16 lma_start_[kMaxRowNum];     // Counted in spelling ids.
  LemmaIdType lma_id_[kMaxRowNum];
  size_t fixed_lmas_;

  // If fixed_lmas_ is bigger than i,  Element i is used to indicate whether
  // the i'th lemma id in lma_id_ is the first candidate for that step.
  // If all candidates are the first one for that step, the whole string can be
  // decoded by the engine automatically, so no need to add it to user
  // dictionary. (We are considering to add it to user dictionary in the
  // future).
  uint8 fixed_lmas_no1_[kMaxRowNum];

  // Composing phrase
  ComposingPhrase c_phrase_;

  // If dmi_c_phrase_ is true, the decoder will try to match the
  // composing phrase (And definitely it will match successfully). If it
  // is false, the decoder will try to match lemmas items in dictionaries.
  bool dmi_c_phrase_;

  // The starting positions and spelling ids for the first full sentence
  // candidate.
  size_t spl_id_num_;                // Number of splling ids
  uint16 spl_start_[kMaxRowNum];     // Starting positions
  uint16 spl_id_[kMaxRowNum];        // Spelling ids
  // Used to remember the last fixed position, counted in Hanzi.
  size_t fixed_hzs_;

  // Lemma Items with possibility score, two purposes:
  // 1. In Viterbi decoding, this buffer is used to get all possible candidates
  // for current step;
  // 2. When the search is done, this buffer is used to get candiates from the
  // first un-fixed step and show them to the user.
  LmaPsbItem lpi_items_[kMaxLmaPsbItems];
  size_t lpi_total_;

  // Assign the pointers with NULL. The caller makes sure that all pointers are
  // not valid before calling it. This function only will be called in the
  // construction function and free_resource().
  void reset_pointers_to_null();

  bool alloc_resource();

  void free_resource();

  // Reset the search space totally.
  bool reset_search0();

  // Reset the search space from ch_pos step. For example, if the original
  // input Pinyin is "an", reset_search(1) will reset the search space to the
  // result of "a". If the given position is out of range, return false.
  // if clear_fixed_this_step is true, and the ch_pos step is a fixed step,
  // clear its fixed status. if clear_dmi_his_step is true, clear the DMI nodes.
  // If clear_mtrx_this_sTep is true, clear the mtrx nodes of this step.
  // The DMI nodes will be kept.
  //
  // Note: this function should not destroy content of pys_.
  bool reset_search(size_t ch_pos, bool clear_fixed_this_step,
                    bool clear_dmi_this_step, bool clear_mtrx_this_step);

  // Delete a part of the content in pys_.
  void del_in_pys(size_t start, size_t len);

  // Delete a spelling id and its corresponding Chinese character, and merge
  // the fixed lemmas into the composing phrase.
  // del_spl_pos indicates which spelling id needs to be delete.
  // This function will update the lemma and spelling segmentation information.
  // The caller guarantees that fixed_lmas_ > 0 and del_spl_pos is within
  // the fixed lemmas.
  void merge_fixed_lmas(size_t del_spl_pos);

  // Get spelling start posistions and ids. The result will be stored in
  // spl_id_num_, spl_start_[], spl_id_[].
  // fixed_hzs_ will be also assigned.
  void get_spl_start_id();

  // Get all lemma ids with match the given spelling id stream(shorter than the
  // maximum length of a word).
  // If pfullsent is not NULL, means the full sentence candidate may be the
  // same with the coming lemma string, if so, remove that lemma.
  // The result is sorted in descendant order by the frequency score.
  size_t get_lpis(const uint16* splid_str, size_t splid_str_len,
                  LmaPsbItem* lma_buf, size_t max_lma_buf,
                  const char16 *pfullsent, bool sort_by_psb);

  uint16 get_lemma_str(LemmaIdType id_lemma, char16 *str_buf, uint16 str_max);

  uint16 get_lemma_splids(LemmaIdType id_lemma, uint16 *splids,
                          uint16 splids_max, bool arg_valid);


  // Extend a DMI node with a spelling id. ext_len is the length of the rows
  // to extend, actually, it is the size of the spelling string of splid.
  // return value can be 1 or 0.
  // 1 means a new DMI is filled in (dmi_pool_used_ is the next blank DMI in
  // the pool).
  // 0 means either the dmi node can not be extended with splid, or the splid
  // is a Shengmu id, which is only used to get lpi_items, or the result node
  // in DictTrie has no son, it is not nccessary to keep the new DMI.
  //
  // This function modifies the content of lpi_items_ and lpi_total_.
  // lpi_items_ is used to get the LmaPsbItem list, lpi_total_ returns the size.
  // The function's returned value has no relation with the value of lpi_num.
  //
  // If dmi == NULL, this function will extend the root node of DictTrie
  //
  // This function will not change dmi_nd_pool_used_. Please change it after
  // calling this function if necessary.
  //
  // The caller should guarantees that NULL != dep.
  size_t extend_dmi(DictExtPara *dep, DictMatchInfo *dmi_s);

  // Extend dmi for the composing phrase.
  size_t extend_dmi_c(DictExtPara *dep, DictMatchInfo *dmi_s);

  // Extend a MatrixNode with the give LmaPsbItem list.
  // res_row is the destination row number.
  // This function does not change mtrx_nd_pool_used_. Please change it after
  // calling this function if necessary.
  // return 0 always.
  size_t extend_mtrx_nd(MatrixNode *mtrx_nd, LmaPsbItem lpi_items[],
                        size_t lpi_num, PoolPosType dmi_fr, size_t res_row);


  // Try to find a dmi node at step_to position, and the found dmi node should
  // match the given spelling id strings.
  PoolPosType match_dmi(size_t step_to, uint16 spl_ids[], uint16 spl_id_num);

  bool add_char(char ch);
  bool prepare_add_char(char ch);

  // Called after prepare_add_char, so the input char has been saved.
  bool add_char_qwerty();

  // Prepare candidates from the last fixed hanzi position.
  void prepare_candidates();

  // Is the character in step pos a splitter character?
  // The caller guarantees that the position is valid.
  bool is_split_at(uint16 pos);

  void fill_dmi(DictMatchInfo *dmi, MileStoneHandle *handles,
                PoolPosType dmi_fr,
                uint16 spl_id, uint16 node_num, unsigned char dict_level,
                bool splid_end_split, unsigned char splstr_len,
                unsigned char all_full_id);

  size_t inner_predict(const char16 fixed_scis_ids[], uint16 scis_num,
                       char16 predict_buf[][kMaxPredictSize + 1],
                       size_t buf_len);

  // Add the first candidate to the user dictionary.
  bool try_add_cand0_to_userdict();

  // Add a user lemma to the user dictionary. This lemma is a subset of
  // candidate 0. lma_from is from which lemma in lma_ids_, lma_num is the
  // number of lemmas to be combined together as a new lemma. The caller
  // gurantees that the combined new lemma's length is less or equal to
  // kMaxLemmaSize.
  bool add_lma_to_userdict(uint16 lma_from, uint16 lma_num, float score);

  // Update dictionary frequencies.
  void update_dict_freq();

  void debug_print_dmi(PoolPosType dmi_pos, uint16 nest_level);

 public:
  MatrixSearch();
  ~MatrixSearch();

  bool init(const char *fn_sys_dict, const char *fn_usr_dict);

  bool init_fd(int sys_fd, long start_offset, long length,
               const char *fn_usr_dict);

  void init_user_dictionary(const char *fn_usr_dict);

  bool is_user_dictionary_enabled() const;

  void set_max_lens(size_t max_sps_len, size_t max_hzs_len);

  void close();

  void flush_cache();

  void set_xi_an_switch(bool xi_an_enabled);

  bool get_xi_an_switch();

  // Reset the search space. Equivalent to reset_search(0).
  // If inited, always return true;
  bool reset_search();

  // Search a Pinyin string.
  // Return value is the position successfully parsed.
  size_t search(const char *py, size_t py_len);

  // Used to delete something in the Pinyin string kept by the engine, and do
  // a re-search.
  // Return value is the new length of Pinyin string kept by the engine which
  // is parsed successfully.
  // If is_pos_in_splid is false, pos is used to indicate that pos-th Pinyin
  // character needs to be deleted. If is_pos_in_splid is true, all Pinyin
  // characters for pos-th spelling id needs to be deleted.
  // If the deleted character(s) is just after a fixed lemma or sub lemma in
  // composing phrase, clear_fixed_this_step indicates whether we needs to
  // unlock the last fixed lemma or sub lemma.
  // If is_pos_in_splid is false, and pos-th character is in the range for the
  // fixed lemmas or composing string, this function will do nothing and just
  // return the result of the previous search.
  size_t delsearch(size_t pos, bool is_pos_in_splid,
                   bool clear_fixed_this_step);

  // Get the number of candiates, called after search().
  size_t get_candidate_num();

  // Get the Pinyin string stored by the engine.
  // *decoded_len returns the length of the successfully decoded string.
  const char* get_pystr(size_t *decoded_len);

  // Get the spelling boundaries for the first sentence candidate.
  // Number of spellings will be returned. The number of valid elements in
  // spl_start is one more than the return value because the last one is used
  // to indicate the beginning of the next un-input speling.
  // For a Pinyin "women", the returned value is 2, spl_start is [0, 2, 5] .
  size_t get_spl_start(const uint16 *&spl_start);

  // Get one candiate string. If full sentence candidate is available, it will
  // be the first one.
  char16* get_candidate(size_t cand_id, char16 *cand_str, size_t max_len);

  // Get the first candiate, which is a "full sentence".
  // retstr_len is not NULL, it will be used to return the string length.
  // If only_unfixed is true, only unfixed part will be fetched.
  char16* get_candidate0(char16* cand_str, size_t max_len,
                         uint16 *retstr_len, bool only_unfixed);

  // Choose a candidate. The decoder will do a search after the fixed position.
  size_t choose(size_t cand_id);

  // Cancel the last choosing operation, and return the new number of choices.
  size_t cancel_last_choice();

  // Get the length of fixed Hanzis.
  size_t get_fixedlen();

  size_t get_predicts(const char16 fixed_buf[],
                      char16 predict_buf[][kMaxPredictSize + 1],
                      size_t buf_len);
};
}

#endif  // PINYINIME_ANDPY_INCLUDE_MATRIXSEARCH_H__
first commit 2020-07-31 06:50:46 +00:00			`/*`
			`* Copyright (C) 2009 The Android Open Source Project`
			`*`
			`* Licensed under the Apache License, Version 2.0 (the "License");`
			`* you may not use this file except in compliance with the License.`
			`* You may obtain a copy of the License at`
			`*`
			`* http://www.apache.org/licenses/LICENSE-2.0`
			`*`
			`* Unless required by applicable law or agreed to in writing, software`
			`* distributed under the License is distributed on an "AS IS" BASIS,`
			`* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.`
			`* See the License for the specific language governing permissions and`
			`* limitations under the License.`
			`*/`

			`#ifndef PINYINIME_ANDPY_INCLUDE_MATRIXSEARCH_H__`
			`#define PINYINIME_ANDPY_INCLUDE_MATRIXSEARCH_H__`

			`#include <stdlib.h>`
			`#include "./atomdictbase.h"`
			`#include "./dicttrie.h"`
			`#include "./searchutility.h"`
			`#include "./spellingtrie.h"`
			`#include "./splparser.h"`

			`namespace ime_pinyin {`

			`static const size_t kMaxRowNum = kMaxSearchSteps;`

			`typedef struct {`
			`// MileStoneHandle objects for the system and user dictionaries.`
			`MileStoneHandle dict_handles[2];`
			`// From which DMI node. -1 means it's from root.`
			`PoolPosType dmi_fr;`
			`// The spelling id for the Pinyin string from the previous DMI to this node.`
			`// If it is a half id like Shengmu, the node pointed by dict_node is the first`
			`// node with this Shengmu,`
			`uint16 spl_id;`
			`// What's the level of the dict node. Level of root is 0, but root is never`
			`// recorded by dict_node.`
			`unsigned char dict_level:7;`
			`// If this node is for composing phrase, this bit is 1.`
			`unsigned char c_phrase:1;`
			`// Whether the spl_id is parsed with a split character at the end.`
			`unsigned char splid_end_split:1;`
			`// What's the length of the spelling string for this match, for the whole`
			`// word.`
			`unsigned char splstr_len:7;`
			`// Used to indicate whether all spelling ids from the root are full spelling`
			`// ids. This information is useful for keymapping mode(not finished). Because`
			`// in this mode, there is no clear boundaries, we prefer those results which`
			`// have full spelling ids.`
			`unsigned char all_full_id:1;`
			`} DictMatchInfo, *PDictMatchInfo;`

			`typedef struct MatrixNode {`
			`LemmaIdType id;`
			`float score;`
			`MatrixNode *from;`
			`// From which DMI node. Used to trace the spelling segmentation.`
			`PoolPosType dmi_fr;`
			`uint16 step;`
			`} MatrixNode, *PMatrixNode;`

			`typedef struct {`
			`// The MatrixNode position in the matrix pool`
			`PoolPosType mtrx_nd_pos;`
			`// The DictMatchInfo position in the DictMatchInfo pool.`
			`PoolPosType dmi_pos;`
			`uint16 mtrx_nd_num;`
			`uint16 dmi_num:15;`
			`// Used to indicate whether there are dmi nodes in this step with full`
			`// spelling id. This information is used to decide whether a substring of a`
			`// valid Pinyin should be extended.`
			`//`
			`// Example1: shoudao`
			`// When the last char 'o' is added, the parser will find "dao" is a valid`
			`// Pinyin, and because all dmi nodes at location 'd' (including those for`
			`// "shoud", and those for "d") have Shengmu id only, so it is not necessary`
			`// to extend "ao", otherwise the result may be "shoud ao", that is not`
			`// reasonable.`
			`//`
			`// Example2: hengao`
			`// When the last 'o' is added, the parser finds "gao" is a valid Pinyin.`
			`// Because some dmi nodes at 'g' has Shengmu ids (hen'g and g), but some dmi`
			`// nodes at 'g' has full ids ('heng'), so it is necessary to extend "ao", thus`
			`// "heng ao" can also be the result.`
			`//`
			`// Similarly, "ganga" is expanded to "gang a".`
			`//`
			`// For Pinyin string "xian", because "xian" is a valid Pinyin, because all dmi`
			`// nodes at 'x' only have Shengmu ids, the parser will not try "x ian" (and it`
			`// is not valid either). If the parser uses break in the loop, the result`
			`// always be "xian"; but if the parser uses continue in the loop, "xi an" will`
			`// also be tried. This behaviour can be set via the function`
			`// set_xi_an_switch().`
			`uint16 dmi_has_full_id:1;`
			`// Points to a MatrixNode of the current step to indicate which choice the`
			`// user selects.`
			`MatrixNode *mtrx_nd_fixed;`
			`} MatrixRow, *PMatrixRow;`

			`// When user inputs and selects candidates, the fixed lemma ids are stored in`
			`// lma_id_ of class MatrixSearch, and fixed_lmas_ is used to indicate how many`
			`// lemmas from the beginning are fixed. If user deletes Pinyin characters one`
			`// by one from the end, these fixed lemmas can be unlocked one by one when`
			`// necessary. Whenever user deletes a Chinese character and its spelling string`
			`// in these fixed lemmas, all fixed lemmas will be merged together into a unit`
			`// named ComposingPhrase with a lemma id kLemmaIdComposing, and this composing`
			`// phrase will be the first lemma in the sentence. Because it contains some`
			`// modified lemmas (by deleting a character), these merged lemmas are called`
			`// sub lemmas (sublma), and each of them are represented individually, so that`
			`// when user deletes Pinyin characters from the end, these sub lemmas can also`
			`// be unlocked one by one.`
			`typedef struct {`
			`uint16 spl_ids[kMaxRowNum];`
			`uint16 spl_start[kMaxRowNum];`
			`char16 chn_str[kMaxRowNum]; // Chinese string.`
			`uint16 sublma_start[kMaxRowNum]; // Counted in Chinese characters.`
			`size_t sublma_num;`
			`uint16 length; // Counted in Chinese characters.`
			`} ComposingPhrase, *TComposingPhrase;`

			`class MatrixSearch {`
			`private:`
			`// If it is true, prediction list by string whose length is greater than 1`
			`// will be limited to a reasonable number.`
			`static const bool kPredictLimitGt1 = false;`

			`// If it is true, the engine will prefer long history based prediction,`
			`// for example, when user inputs "BeiJing", we prefer "DaXue", etc., which are`
			`// based on the two-character history.`
			`static const bool kPreferLongHistoryPredict = true;`

			`// If it is true, prediction will only be based on user dictionary. this flag`
			`// is for debug purpose.`
			`static const bool kOnlyUserDictPredict = false;`

			`// The maximum buffer to store LmaPsbItems.`
			`static const size_t kMaxLmaPsbItems = 1450;`

			`// How many rows for each step.`
			`static const size_t kMaxNodeARow = 5;`

			`// The maximum length of the sentence candidates counted in chinese`
			`// characters`
			`static const size_t kMaxSentenceLength = 16;`

			`// The size of the matrix node pool.`
			`static const size_t kMtrxNdPoolSize = 200;`

			`// The size of the DMI node pool.`
			`static const size_t kDmiPoolSize = 800;`

			`// Used to indicate whether this object has been initialized.`
			`bool inited_;`

			`// Spelling trie.`
			`const SpellingTrie *spl_trie_;`

			`// Used to indicate this switcher status: when "xian" is parseed, should`
			`// "xi an" also be extended. Default is false.`
			`// These cases include: xia, xian, xiang, zhuan, jiang..., etc. The string`
			`// should be valid for a FULL spelling, or a combination of two spellings,`
			`// first of which is a FULL id too. So even it is true, "da" will never be`
			`// split into "d a", because "d" is not a full spelling id.`
			`bool xi_an_enabled_;`

			`// System dictionary.`
			`DictTrie* dict_trie_;`

			`// User dictionary.`
			`AtomDictBase* user_dict_;`

			`// Spelling parser.`
			`SpellingParser* spl_parser_;`

			`// The maximum allowed length of spelling string (such as a Pinyin string).`
			`size_t max_sps_len_;`

			`// The maximum allowed length of a result Chinese string.`
			`size_t max_hzs_len_;`

			`// Pinyin string. Max length: kMaxRowNum - 1`
			`char pys_[kMaxRowNum];`

			`// The length of the string that has been decoded successfully.`
			`size_t pys_decoded_len_;`

			`// Shared buffer for multiple purposes.`
			`size_t *share_buf_;`

			`MatrixNode *mtrx_nd_pool_;`
			`PoolPosType mtrx_nd_pool_used_; // How many nodes used in the pool`
			`DictMatchInfo *dmi_pool_;`
			`PoolPosType dmi_pool_used_; // How many items used in the pool`

			`MatrixRow *matrix_; // The first row is for starting`

			`DictExtPara *dep_; // Parameter used to extend DMI nodes.`

			`NPredictItem *npre_items_; // Used to do prediction`
			`size_t npre_items_len_;`

			`// The starting positions and lemma ids for the full sentence candidate.`
			`size_t lma_id_num_;`
			`uint16 lma_start_[kMaxRowNum]; // Counted in spelling ids.`
			`LemmaIdType lma_id_[kMaxRowNum];`
			`size_t fixed_lmas_;`

			`// If fixed_lmas_ is bigger than i, Element i is used to indicate whether`
			`// the i'th lemma id in lma_id_ is the first candidate for that step.`
			`// If all candidates are the first one for that step, the whole string can be`
			`// decoded by the engine automatically, so no need to add it to user`
			`// dictionary. (We are considering to add it to user dictionary in the`
			`// future).`
			`uint8 fixed_lmas_no1_[kMaxRowNum];`

			`// Composing phrase`
			`ComposingPhrase c_phrase_;`

			`// If dmi_c_phrase_ is true, the decoder will try to match the`
			`// composing phrase (And definitely it will match successfully). If it`
			`// is false, the decoder will try to match lemmas items in dictionaries.`
			`bool dmi_c_phrase_;`

			`// The starting positions and spelling ids for the first full sentence`
			`// candidate.`
			`size_t spl_id_num_; // Number of splling ids`
			`uint16 spl_start_[kMaxRowNum]; // Starting positions`
			`uint16 spl_id_[kMaxRowNum]; // Spelling ids`
			`// Used to remember the last fixed position, counted in Hanzi.`
			`size_t fixed_hzs_;`

			`// Lemma Items with possibility score, two purposes:`
			`// 1. In Viterbi decoding, this buffer is used to get all possible candidates`
			`// for current step;`
			`// 2. When the search is done, this buffer is used to get candiates from the`
			`// first un-fixed step and show them to the user.`
			`LmaPsbItem lpi_items_[kMaxLmaPsbItems];`
			`size_t lpi_total_;`

			`// Assign the pointers with NULL. The caller makes sure that all pointers are`
			`// not valid before calling it. This function only will be called in the`
			`// construction function and free_resource().`
			`void reset_pointers_to_null();`

			`bool alloc_resource();`

			`void free_resource();`

			`// Reset the search space totally.`
			`bool reset_search0();`

			`// Reset the search space from ch_pos step. For example, if the original`
			`// input Pinyin is "an", reset_search(1) will reset the search space to the`
			`// result of "a". If the given position is out of range, return false.`
			`// if clear_fixed_this_step is true, and the ch_pos step is a fixed step,`
			`// clear its fixed status. if clear_dmi_his_step is true, clear the DMI nodes.`
			`// If clear_mtrx_this_sTep is true, clear the mtrx nodes of this step.`
			`// The DMI nodes will be kept.`
			`//`
			`// Note: this function should not destroy content of pys_.`
			`bool reset_search(size_t ch_pos, bool clear_fixed_this_step,`
			`bool clear_dmi_this_step, bool clear_mtrx_this_step);`

			`// Delete a part of the content in pys_.`
			`void del_in_pys(size_t start, size_t len);`

			`// Delete a spelling id and its corresponding Chinese character, and merge`
			`// the fixed lemmas into the composing phrase.`
			`// del_spl_pos indicates which spelling id needs to be delete.`
			`// This function will update the lemma and spelling segmentation information.`
			`// The caller guarantees that fixed_lmas_ > 0 and del_spl_pos is within`
			`// the fixed lemmas.`
			`void merge_fixed_lmas(size_t del_spl_pos);`

			`// Get spelling start posistions and ids. The result will be stored in`
			`// spl_id_num_, spl_start_[], spl_id_[].`
			`// fixed_hzs_ will be also assigned.`
			`void get_spl_start_id();`

			`// Get all lemma ids with match the given spelling id stream(shorter than the`
			`// maximum length of a word).`
			`// If pfullsent is not NULL, means the full sentence candidate may be the`
			`// same with the coming lemma string, if so, remove that lemma.`
			`// The result is sorted in descendant order by the frequency score.`
			`size_t get_lpis(const uint16* splid_str, size_t splid_str_len,`
			`LmaPsbItem* lma_buf, size_t max_lma_buf,`
			`const char16 *pfullsent, bool sort_by_psb);`

			`uint16 get_lemma_str(LemmaIdType id_lemma, char16 *str_buf, uint16 str_max);`

			`uint16 get_lemma_splids(LemmaIdType id_lemma, uint16 *splids,`
			`uint16 splids_max, bool arg_valid);`


			`// Extend a DMI node with a spelling id. ext_len is the length of the rows`
			`// to extend, actually, it is the size of the spelling string of splid.`
			`// return value can be 1 or 0.`
			`// 1 means a new DMI is filled in (dmi_pool_used_ is the next blank DMI in`
			`// the pool).`
			`// 0 means either the dmi node can not be extended with splid, or the splid`
			`// is a Shengmu id, which is only used to get lpi_items, or the result node`
			`// in DictTrie has no son, it is not nccessary to keep the new DMI.`
			`//`
			`// This function modifies the content of lpi_items_ and lpi_total_.`
			`// lpi_items_ is used to get the LmaPsbItem list, lpi_total_ returns the size.`
			`// The function's returned value has no relation with the value of lpi_num.`
			`//`
			`// If dmi == NULL, this function will extend the root node of DictTrie`
			`//`
			`// This function will not change dmi_nd_pool_used_. Please change it after`
			`// calling this function if necessary.`
			`//`
			`// The caller should guarantees that NULL != dep.`
			`size_t extend_dmi(DictExtPara dep, DictMatchInfo dmi_s);`

			`// Extend dmi for the composing phrase.`
			`size_t extend_dmi_c(DictExtPara dep, DictMatchInfo dmi_s);`

			`// Extend a MatrixNode with the give LmaPsbItem list.`
			`// res_row is the destination row number.`
			`// This function does not change mtrx_nd_pool_used_. Please change it after`
			`// calling this function if necessary.`
			`// return 0 always.`
			`size_t extend_mtrx_nd(MatrixNode *mtrx_nd, LmaPsbItem lpi_items[],`
			`size_t lpi_num, PoolPosType dmi_fr, size_t res_row);`


			`// Try to find a dmi node at step_to position, and the found dmi node should`
			`// match the given spelling id strings.`
			`PoolPosType match_dmi(size_t step_to, uint16 spl_ids[], uint16 spl_id_num);`

			`bool add_char(char ch);`
			`bool prepare_add_char(char ch);`

			`// Called after prepare_add_char, so the input char has been saved.`
			`bool add_char_qwerty();`

			`// Prepare candidates from the last fixed hanzi position.`
			`void prepare_candidates();`

			`// Is the character in step pos a splitter character?`
			`// The caller guarantees that the position is valid.`
			`bool is_split_at(uint16 pos);`

			`void fill_dmi(DictMatchInfo dmi, MileStoneHandle handles,`
			`PoolPosType dmi_fr,`
			`uint16 spl_id, uint16 node_num, unsigned char dict_level,`
			`bool splid_end_split, unsigned char splstr_len,`
			`unsigned char all_full_id);`

			`size_t inner_predict(const char16 fixed_scis_ids[], uint16 scis_num,`
			`char16 predict_buf[][kMaxPredictSize + 1],`
			`size_t buf_len);`

			`// Add the first candidate to the user dictionary.`
			`bool try_add_cand0_to_userdict();`

			`// Add a user lemma to the user dictionary. This lemma is a subset of`
			`// candidate 0. lma_from is from which lemma in lma_ids_, lma_num is the`
			`// number of lemmas to be combined together as a new lemma. The caller`
			`// gurantees that the combined new lemma's length is less or equal to`
			`// kMaxLemmaSize.`
			`bool add_lma_to_userdict(uint16 lma_from, uint16 lma_num, float score);`

			`// Update dictionary frequencies.`
			`void update_dict_freq();`

			`void debug_print_dmi(PoolPosType dmi_pos, uint16 nest_level);`

			`public:`
			`MatrixSearch();`
			`~MatrixSearch();`

			`bool init(const char fn_sys_dict, const char fn_usr_dict);`

			`bool init_fd(int sys_fd, long start_offset, long length,`
			`const char *fn_usr_dict);`

			`void init_user_dictionary(const char *fn_usr_dict);`

			`bool is_user_dictionary_enabled() const;`

			`void set_max_lens(size_t max_sps_len, size_t max_hzs_len);`

			`void close();`

			`void flush_cache();`

			`void set_xi_an_switch(bool xi_an_enabled);`

			`bool get_xi_an_switch();`

			`// Reset the search space. Equivalent to reset_search(0).`
			`// If inited, always return true;`
			`bool reset_search();`

			`// Search a Pinyin string.`
			`// Return value is the position successfully parsed.`
			`size_t search(const char *py, size_t py_len);`

			`// Used to delete something in the Pinyin string kept by the engine, and do`
			`// a re-search.`
			`// Return value is the new length of Pinyin string kept by the engine which`
			`// is parsed successfully.`
			`// If is_pos_in_splid is false, pos is used to indicate that pos-th Pinyin`
			`// character needs to be deleted. If is_pos_in_splid is true, all Pinyin`
			`// characters for pos-th spelling id needs to be deleted.`
			`// If the deleted character(s) is just after a fixed lemma or sub lemma in`
			`// composing phrase, clear_fixed_this_step indicates whether we needs to`
			`// unlock the last fixed lemma or sub lemma.`
			`// If is_pos_in_splid is false, and pos-th character is in the range for the`
			`// fixed lemmas or composing string, this function will do nothing and just`
			`// return the result of the previous search.`
			`size_t delsearch(size_t pos, bool is_pos_in_splid,`
			`bool clear_fixed_this_step);`

			`// Get the number of candiates, called after search().`
			`size_t get_candidate_num();`

			`// Get the Pinyin string stored by the engine.`
			`// *decoded_len returns the length of the successfully decoded string.`
			`const char* get_pystr(size_t *decoded_len);`

			`// Get the spelling boundaries for the first sentence candidate.`
			`// Number of spellings will be returned. The number of valid elements in`
			`// spl_start is one more than the return value because the last one is used`
			`// to indicate the beginning of the next un-input speling.`
			`// For a Pinyin "women", the returned value is 2, spl_start is [0, 2, 5] .`
			`size_t get_spl_start(const uint16 *&spl_start);`

			`// Get one candiate string. If full sentence candidate is available, it will`
			`// be the first one.`
			`char16* get_candidate(size_t cand_id, char16 *cand_str, size_t max_len);`

			`// Get the first candiate, which is a "full sentence".`
			`// retstr_len is not NULL, it will be used to return the string length.`
			`// If only_unfixed is true, only unfixed part will be fetched.`
			`char16* get_candidate0(char16* cand_str, size_t max_len,`
			`uint16 *retstr_len, bool only_unfixed);`

			`// Choose a candidate. The decoder will do a search after the fixed position.`
			`size_t choose(size_t cand_id);`

			`// Cancel the last choosing operation, and return the new number of choices.`
			`size_t cancel_last_choice();`

			`// Get the length of fixed Hanzis.`
			`size_t get_fixedlen();`

			`size_t get_predicts(const char16 fixed_buf[],`
			`char16 predict_buf[][kMaxPredictSize + 1],`
			`size_t buf_len);`
			`};`
			`}`

			`#endif // PINYINIME_ANDPY_INCLUDE_MATRIXSEARCH_H__`