00001 
00002 
00003 
00004 
00005 
00006 
00007 
00008 
00009 #ifndef MECAB_MECAB_H
00010 #define MECAB_MECAB_H
00011 
00012 
00013 struct mecab_dictionary_info_t {
00014   const char                     *filename;
00015   const char                     *charset;
00016   unsigned int                    size;
00017   int                             type;
00018   unsigned int                    lsize;
00019   unsigned int                    rsize;
00020   unsigned short                  version;
00021   struct mecab_dictionary_info_t *next;
00022 };
00023 
00024 struct mecab_path_t {
00025   struct mecab_node_t* rnode;
00026   struct mecab_path_t* rnext;
00027   struct mecab_node_t* lnode;
00028   struct mecab_path_t* lnext;
00029   int                  cost;
00030   float                prob;
00031 };
00032 
00033 struct mecab_learner_path_t {
00034   struct mecab_learner_node_t*  rnode;
00035   struct mecab_learner_path_t*  rnext;
00036   struct mecab_learner_node_t*  lnode;
00037   struct mecab_learner_path_t*  lnext;
00038   double                        cost;
00039   const int                     *fvector;
00040 };
00041 
00042 struct mecab_token_t {
00043   unsigned short lcAttr;
00044   unsigned short rcAttr;
00045   unsigned short posid;
00046   short wcost;
00047   unsigned int   feature;
00048   unsigned int   compound;  
00049 };
00050 
00051 struct mecab_node_t {
00052   struct mecab_node_t  *prev;
00053   struct mecab_node_t  *next;
00054   struct mecab_node_t  *enext;
00055   struct mecab_node_t  *bnext;
00056   struct mecab_path_t  *rpath;
00057   struct mecab_path_t  *lpath;
00058   struct mecab_node_t **begin_node_list;
00059   struct mecab_node_t **end_node_list;
00060   const char           *surface;
00061   const char           *feature;
00062   unsigned int          id;
00063   unsigned short        length; 
00064   unsigned short        rlength; 
00065   unsigned short        rcAttr;
00066   unsigned short        lcAttr;
00067   unsigned short        posid;
00068   unsigned char         char_type;
00069   unsigned char         stat;
00070   unsigned char         isbest;
00071   unsigned int          sentence_length; 
00072   float                 alpha;
00073   float                 beta;
00074   float                 prob;
00075   short                 wcost;
00076   long                  cost;
00077   struct mecab_token_t  *token;
00078 };
00079 
00080 
00081 
00082 struct mecab_learner_node_t {
00083   struct mecab_learner_node_t *prev;
00084   struct mecab_learner_node_t *next;
00085   struct mecab_learner_node_t *enext;
00086   struct mecab_learner_node_t *bnext;
00087   struct mecab_learner_path_t *rpath;
00088   struct mecab_learner_path_t *lpath;
00089   struct mecab_learner_node_t *anext;
00090   const char                  *surface;
00091   const char                  *feature;
00092   unsigned int                 id;
00093   unsigned short               length;
00094   unsigned short               rlength;
00095   unsigned short               rcAttr;
00096   unsigned short               lcAttr;
00097   unsigned short               posid;
00098   unsigned char                char_type;
00099   unsigned char                stat;
00100   unsigned char                isbest;
00101   double                       alpha;
00102   double                       beta;
00103   short                        wcost2;
00104   double                       wcost;
00105   double                       cost;
00106   const int                    *fvector;
00107   struct mecab_token_t         *token;
00108 };
00109 
00110 #define MECAB_NOR_NODE  0
00111 #define MECAB_UNK_NODE  1
00112 #define MECAB_BOS_NODE  2
00113 #define MECAB_EOS_NODE  3
00114 
00115 #define MECAB_USR_DIC   1
00116 #define MECAB_SYS_DIC   0
00117 #define MECAB_UNK_DIC   2
00118 
00119 
00120 #ifdef __cplusplus
00121 #include <cstdio>
00122 #else
00123 #include <stdio.h>
00124 #endif
00125 
00126 #ifdef __cplusplus
00127 extern "C" {
00128 #endif
00129 
00130 #ifdef _WIN32
00131 #include <windows.h>
00132 #  ifdef DLL_EXPORT
00133 #    define MECAB_DLL_EXTERN  __declspec(dllexport)
00134 #  else
00135 #    define MECAB_DLL_EXTERN  __declspec(dllimport)
00136 #  endif
00137 #endif
00138 
00139 #ifndef MECAB_DLL_EXTERN
00140 #  define MECAB_DLL_EXTERN extern
00141 #endif
00142 
00143   typedef struct mecab_t                 mecab_t;
00144   typedef struct mecab_dictionary_info_t mecab_dictionary_info_t;
00145   typedef struct mecab_node_t            mecab_node_t;
00146   typedef struct mecab_learner_node_t    mecab_learner_node_t;
00147   typedef struct mecab_path_t            mecab_path_t;
00148   typedef struct mecab_learner_path_t    mecab_learner_path_t;
00149   typedef struct mecab_token_t           mecab_token_t;
00150 
00151 #ifndef SWIG
00152   
00153   MECAB_DLL_EXTERN int           mecab_do (int argc, char **argv);
00154 
00155   MECAB_DLL_EXTERN mecab_t*      mecab_new(int argc, char **argv);
00156   MECAB_DLL_EXTERN mecab_t*      mecab_new2(const char *arg);
00157   MECAB_DLL_EXTERN const char*   mecab_version();
00158   MECAB_DLL_EXTERN const char*   mecab_strerror(mecab_t *mecab);
00159   MECAB_DLL_EXTERN void          mecab_destroy(mecab_t *mecab);
00160 
00161   MECAB_DLL_EXTERN int           mecab_get_partial(mecab_t *mecab);
00162   MECAB_DLL_EXTERN void          mecab_set_partial(mecab_t *mecab, int partial);
00163   MECAB_DLL_EXTERN float         mecab_get_theta(mecab_t *mecab);
00164   MECAB_DLL_EXTERN void          mecab_set_theta(mecab_t *mecab, float theta);
00165   MECAB_DLL_EXTERN int           mecab_get_lattice_level(mecab_t *mecab);
00166   MECAB_DLL_EXTERN void          mecab_set_lattice_level(mecab_t *mecab, int level);
00167   MECAB_DLL_EXTERN int           mecab_get_all_morphs(mecab_t *mecab);
00168   MECAB_DLL_EXTERN void          mecab_set_all_morphs(mecab_t *mecab, int all_morphs);
00169 
00170   MECAB_DLL_EXTERN const char*   mecab_sparse_tostr(mecab_t *mecab, const char *str);
00171   MECAB_DLL_EXTERN const char*   mecab_sparse_tostr2(mecab_t *mecab, const char *str, size_t len);
00172   MECAB_DLL_EXTERN char*         mecab_sparse_tostr3(mecab_t *mecab, const char *str, size_t len,
00173                                                      char *ostr, size_t olen);
00174   MECAB_DLL_EXTERN const mecab_node_t* mecab_sparse_tonode(mecab_t *mecab, const char*);
00175   MECAB_DLL_EXTERN const mecab_node_t* mecab_sparse_tonode2(mecab_t *mecab, const char*, size_t);
00176   MECAB_DLL_EXTERN const char*   mecab_nbest_sparse_tostr(mecab_t *mecab, size_t N, const char *str);
00177   MECAB_DLL_EXTERN const char*   mecab_nbest_sparse_tostr2(mecab_t *mecab, size_t N,
00178                                                            const char *str, size_t len);
00179   MECAB_DLL_EXTERN char*         mecab_nbest_sparse_tostr3(mecab_t *mecab, size_t N,
00180                                                            const char *str, size_t len,
00181                                                            char *ostr, size_t olen);
00182   MECAB_DLL_EXTERN int           mecab_nbest_init(mecab_t *mecab, const char *str);
00183   MECAB_DLL_EXTERN int           mecab_nbest_init2(mecab_t *mecab, const char *str, size_t len);
00184   MECAB_DLL_EXTERN const char*   mecab_nbest_next_tostr(mecab_t *mecab);
00185   MECAB_DLL_EXTERN char*         mecab_nbest_next_tostr2(mecab_t *mecab, char *ostr, size_t olen);
00186   MECAB_DLL_EXTERN const mecab_node_t* mecab_nbest_next_tonode(mecab_t *mecab);
00187   MECAB_DLL_EXTERN const char*   mecab_format_node(mecab_t *mecab, const mecab_node_t *node);
00188   MECAB_DLL_EXTERN const mecab_dictionary_info_t* mecab_dictionary_info(mecab_t *mecab);
00189   MECAB_DLL_EXTERN int           mecab_dict_index(int argc, char **argv);
00190   MECAB_DLL_EXTERN int           mecab_dict_gen(int argc, char **argv);
00191   MECAB_DLL_EXTERN int           mecab_cost_train(int argc, char **argv);
00192   MECAB_DLL_EXTERN int           mecab_system_eval(int argc, char **argv);
00193   MECAB_DLL_EXTERN int           mecab_test_gen(int argc, char **argv);
00194 #endif
00195 
00196 #ifdef __cplusplus
00197 }
00198 #endif
00199 
00200 
00201 #ifdef __cplusplus
00202 
00203 namespace MeCab {
00204   typedef struct mecab_dictionary_info_t DictionaryInfo;
00205   typedef struct mecab_path_t            Path;
00206   typedef struct mecab_node_t            Node;
00207   typedef struct mecab_learner_path_t    LearnerPath;
00208   typedef struct mecab_learner_node_t    LearnerNode;
00209   typedef struct mecab_token_t           Token;
00210 
00211   class Tagger {
00212   public:
00213 
00214 #ifndef SWIG
00215     virtual const char* parse(const char *str, size_t len, char *ostr, size_t olen) = 0;
00216     virtual const char* parse(const char *str, size_t len)                          = 0;
00217     virtual const Node* parseToNode(const char *str, size_t len)                    = 0;
00218     virtual const char* parseNBest(size_t N, const char *str, size_t len)           = 0;
00219     virtual bool  parseNBestInit(const char *str, size_t len)                       = 0;
00220 #endif
00221 
00222     virtual const char* parse(const char *str)                = 0;
00223     virtual const Node* parseToNode(const char *str)          = 0;
00224     virtual const char* parseNBest(size_t N, const char *str) = 0;
00225     virtual bool  parseNBestInit(const char *str)             = 0;
00226     virtual const Node* nextNode()                            = 0;
00227     virtual const char* next()                                = 0;
00228     virtual const char* formatNode(const Node *node)          = 0;
00229 
00230     
00231     virtual bool  partial() const                             = 0;
00232     virtual void  set_partial(bool partial)                   = 0;
00233     virtual float theta() const                               = 0;
00234     virtual void  set_theta(float theta)                      = 0;
00235     virtual int   lattice_level() const                       = 0;
00236     virtual void  set_lattice_level(int level)                = 0;
00237     virtual bool  all_morphs() const                          = 0;
00238     virtual void  set_all_morphs(bool all_morphs)             = 0;
00239 
00240 #ifndef SWIG
00241     virtual const char* next(char *ostr , size_t olen)                        = 0;
00242     virtual const char* parseNBest(size_t N, const char *str,
00243                                    size_t len, char *ostr, size_t olen)       = 0;
00244     virtual const char* formatNode(const Node *node, char *ostr, size_t olen) = 0;
00245 #endif
00246 
00247     virtual const DictionaryInfo* dictionary_info() const = 0;
00248 
00249     virtual const char* what() = 0;
00250 
00251     virtual ~Tagger() {}
00252 
00253 #ifndef SIWG
00254     static Tagger* create(int argc, char **argv);
00255     static Tagger* create(const char *arg);
00256 #endif
00257 
00258     static const char *version();
00259   };
00260 
00261   
00262   MECAB_DLL_EXTERN Tagger *createTagger(int argc, char **argv);
00263   MECAB_DLL_EXTERN Tagger *createTagger(const char *arg);
00264   MECAB_DLL_EXTERN const char* getTaggerError();
00265 }
00266 
00267 #endif
00268 #endif