前回のつづき。ざっと。軽く。C言語。

コードを見る

sqlite-autoconf-3170000.tar.gzを解凍する
sqlite3.cファイルをテキストエディタで開く(7.1MBもあるので重い)
tokenizerで文字列検索する

以下のようなコードが見つかった。

sqlite3_tokenizer_module 構造体

struct sqlite3_tokenizer_module {

  /*
  ** Structure version. Should always be set to 0 or 1.
  */
  int iVersion;

  /*
  ** Create a new tokenizer. The values in the argv[] array are the
  ** arguments passed to the "tokenizer" clause of the CREATE VIRTUAL
  ** TABLE statement that created the fts3 table. For example, if
  ** the following SQL is executed:
  **
  **   CREATE .. USING fts3( ... , tokenizer <tokenizer-name> arg1 arg2)
  **
  ** then argc is set to 2, and the argv[] array contains pointers
  ** to the strings "arg1" and "arg2".
  **
  ** This method should return either SQLITE_OK (0), or an SQLite error 
  ** code. If SQLITE_OK is returned, then *ppTokenizer should be set
  ** to point at the newly created tokenizer structure. The generic
  ** sqlite3_tokenizer.pModule variable should not be initialized by
  ** this callback. The caller will do so.
  */
  int (*xCreate)(
    int argc,                           /* Size of argv array */
    const char *const*argv,             /* Tokenizer argument strings */
    sqlite3_tokenizer **ppTokenizer     /* OUT: Created tokenizer */
  );

  /*
  ** Destroy an existing tokenizer. The fts3 module calls this method
  ** exactly once for each successful call to xCreate().
  */
  int (*xDestroy)(sqlite3_tokenizer *pTokenizer);

  /*
  ** Create a tokenizer cursor to tokenize an input buffer. The caller
  ** is responsible for ensuring that the input buffer remains valid
  ** until the cursor is closed (using the xClose() method). 
  */
  int (*xOpen)(
    sqlite3_tokenizer *pTokenizer,       /* Tokenizer object */
    const char *pInput, int nBytes,      /* Input buffer */
    sqlite3_tokenizer_cursor **ppCursor  /* OUT: Created tokenizer cursor */
  );

  /*
  ** Destroy an existing tokenizer cursor. The fts3 module calls this 
  ** method exactly once for each successful call to xOpen().
  */
  int (*xClose)(sqlite3_tokenizer_cursor *pCursor);

  /*
  ** Retrieve the next token from the tokenizer cursor pCursor. This
  ** method should either return SQLITE_OK and set the values of the
  ** "OUT" variables identified below, or SQLITE_DONE to indicate that
  ** the end of the buffer has been reached, or an SQLite error code.
  **
  ** *ppToken should be set to point at a buffer containing the 
  ** normalized version of the token (i.e. after any case-folding and/or
  ** stemming has been performed). *pnBytes should be set to the length
  ** of this buffer in bytes. The input text that generated the token is
  ** identified by the byte offsets returned in *piStartOffset and
  ** *piEndOffset. *piStartOffset should be set to the index of the first
  ** byte of the token in the input buffer. *piEndOffset should be set
  ** to the index of the first byte just past the end of the token in
  ** the input buffer.
  **
  ** The buffer *ppToken is set to point at is managed by the tokenizer
  ** implementation. It is only required to be valid until the next call
  ** to xNext() or xClose(). 
  */
  /* TODO(shess) current implementation requires pInput to be
  ** nul-terminated.  This should either be fixed, or pInput/nBytes
  ** should be converted to zInput.
  */
  int (*xNext)(
    sqlite3_tokenizer_cursor *pCursor,   /* Tokenizer cursor */
    const char **ppToken, int *pnBytes,  /* OUT: Normalized text for token */
    int *piStartOffset,  /* OUT: Byte offset of token in input buffer */
    int *piEndOffset,    /* OUT: Byte offset of end of token in input buffer */
    int *piPosition      /* OUT: Number of tokens returned before this one */
  );

  /***********************************************************************
  ** Methods below this point are only available if iVersion>=1.
  */

  /* 
  ** Configure the language id of a tokenizer cursor.
  */
  int (*xLanguageid)(sqlite3_tokenizer_cursor *pCsr, int iLangid);
};

sqlite3_tokenizer_module構造体の中に関数ポインタがたくさんある。このうち以下の部分がトークナイザと思われる。

  int (*xCreate)(
    int argc,                           /* Size of argv array */
    const char *const*argv,             /* Tokenizer argument strings */
    sqlite3_tokenizer **ppTokenizer     /* OUT: Created tokenizer */
  );

この戻り値と引数を持った関数を実装し、この関数ポインタにセットしてやれば独自トークナイザを実装できそう。

sqlite3_tokenizer_module構造体は以下のようなメソッドを含むclassもどきのようなものなのだろう。たぶん。

Create
Destroy
Open
Close
Next
Languageid

おそらく、これらのフレームワークに沿って実装する必要があるのだろう。

すぐ下に以下のような構造体もあった。

struct sqlite3_tokenizer {
  const sqlite3_tokenizer_module *pModule;  /* The module for this tokenizer */
  /* Tokenizer implementations will typically add additional fields */
};

struct sqlite3_tokenizer_cursor {
  sqlite3_tokenizer *pTokenizer;       /* Tokenizer for this cursor. */
  /* Tokenizer implementations will typically add additional fields */
};

sqlite3_tokenizerはただのラッパ。sqlite3_tokenizer_cursorはcursor。pythonの標準SQLiteAPIで見覚えがある。たぶんファイルポインタのようなものなのだろう。データの位置を指し示す的な。知らんけど。

トークナイザ関数名(FTS3)

sqlite3_tokenizer_moduleで文字列検索してみた。

SQLITE_PRIVATE void sqlite3Fts3SimpleTokenizerModule(sqlite3_tokenizer_module const**ppModule);
SQLITE_PRIVATE void sqlite3Fts3PorterTokenizerModule(sqlite3_tokenizer_module const**ppModule);
#ifndef SQLITE_DISABLE_FTS3_UNICODE
SQLITE_PRIVATE void sqlite3Fts3UnicodeTokenizer(sqlite3_tokenizer_module const**ppModule);
#endif
#ifdef SQLITE_ENABLE_ICU
SQLITE_PRIVATE void sqlite3Fts3IcuTokenizerModule(sqlite3_tokenizer_module const**ppModule);
#endif

SQLite3に組み込まれたトークナイザは以下の4種類があるらしい。

SimpleTokenizer
PorterTokenizer
UnicodeTokenizer
IcuTokenizer

それぞれが何なのかさっぱりわからないが、検索キーワードになりそう。トークナイザの実装関数名っぽい。

FTSバージョン

また、FTSのバージョン値が気になる。Fts3とある。

SQLite FTS3 and FTS4 Extensions
Google翻訳

FTS3とFTS4はほぼ同じです。彼らは共通のコードの大部分を共有し、それらのインターフェースは同じです。

FTS3と4のインタフェースは同じらしい。同一の関数を使用するという意味だろう。だからコード上でFTS3とあるものはFTS4と共通だと。

FTS4はFTS3の強化版です。 FTS3はSQLite バージョン3.5.0 （2007-09-04）以降に公開されています。FTS4の拡張機能はSQLite バージョン3.7.4 （2010-12-07）で追加されました。

私の環境でapt-getしたSQLite3のバージョンはは3.8.2 2013-12-06だった。FTS4が実装されているはず。ただ、FTSを有効にして再コンパイルが必要なのだろうが。

FTS4は、マッチ操作の結果のランク付けに役立つ強化されたmatchinfo（）出力を提供します。

FTS4は検索順位付けしてくれるらしい。ありがたい。しかしFTS4は3よりもディスク容量を多く消費するらしい。

FTS4テーブルはFTS3を使用して作成された同等のテーブルより多くのディスクスペースを消費する可能性があります。通常、オーバーヘッドは1〜2％以下ですが、FTSテーブルに格納されたドキュメントが非常に小さい場合は、10％もの高さになることがあります。

ディスク容量と検索速度&品質のトレードオフ。

FTS5は異なるインタフェースと思われる。とりあえずFTS3(FTS4)について見てみる。

sqlite3Fts3Init()

SQLITE_PRIVATE int sqlite3Fts3Init(sqlite3 *db){
  int rc = SQLITE_OK;
  Fts3Hash *pHash = 0;
  const sqlite3_tokenizer_module *pSimple = 0;
  const sqlite3_tokenizer_module *pPorter = 0;
#ifndef SQLITE_DISABLE_FTS3_UNICODE
  const sqlite3_tokenizer_module *pUnicode = 0;
#endif

#ifdef SQLITE_ENABLE_ICU
  const sqlite3_tokenizer_module *pIcu = 0;
  sqlite3Fts3IcuTokenizerModule(&pIcu);＃
#endif

#ifndef SQLITE_DISABLE_FTS3_UNICODE
  sqlite3Fts3UnicodeTokenizer(&pUnicode);
#endif
  ...
``

コンパイル時の引数で変更できるのだろうか。標準トークナイザは`sqlite3Fts3UnicodeTokenizer`ということだろうか。わざわざ無効にしないといけないように見えるから。

## sqlite3Fts3OpenTokenizer()

SQLITE_PRIVATE int sqlite3Fts3OpenTokenizer( sqlite3_tokenizer pTokenizer, int iLangid, const char z, int n, sqlite3_tokenizer_cursor ppCsr ){ sqlite3_tokenizer_module const pModule = pTokenizer->pModule; sqlite3_tokenizer_cursor pCsr = 0; int rc;

rc = pModule->xOpen(pTokenizer, z, n, &pCsr); assert( rc==SQLITE_OK || pCsr==0 ); if( rc==SQLITE_OK ){ pCsr->pTokenizer = pTokenizer; if( pModule->iVersion>=1 ){ rc = pModule->xLanguageid(pCsr, iLangid); if( rc!=SQLITE_OK ){ pModule->xClose(pCsr); pCsr = 0; } } } *ppCsr = pCsr; return rc; }

カーソルのインスタンスをセットしている。トークナイザを使いはじめるときに必ず呼ぶ処理なのだろう。

## getNextToken()

よくわからない。引数に知らない構造体もある。

static int getNextToken( ParseContext pParse, / fts3 query parse context / int iCol, / Value for Fts3Phrase.iColumn / const char z, int n, / Input string / Fts3Expr ppExpr, / OUT: expression / int pnConsumed / OUT: Number of bytes consumed / ){ sqlite3_tokenizer pTokenizer = pParse->pTokenizer; sqlite3_tokenizer_module const pModule = pTokenizer->pModule; int rc; sqlite3_tokenizer_cursor pCursor; Fts3Expr *pRet = 0; int i = 0;

/ Set variable i to the maximum number of bytes of input to tokenize. / for(i=0; i<n; i++){ if( sqlite3_fts3_enable_parentheses && (z[i]==‘(’ || z[i]==‘)’) ) break; if( z[i]==‘“’ ) break; }

pnConsumed = i; rc = sqlite3Fts3OpenTokenizer(pTokenizer, pParse->iLangid, z, i, &pCursor); if( rc==SQLITE_OK ){ const char zToken; int nToken = 0, iStart = 0, iEnd = 0, iPosition = 0; int nByte; / total space to allocate /

rc = pModule->xNext(pCursor, &zToken, &nToken, &iStart, &iEnd, &iPosition);
if( rc==SQLITE_OK ){
  nByte = sizeof(Fts3Expr) + sizeof(Fts3Phrase) + nToken;
  pRet = (Fts3Expr *)fts3MallocZero(nByte);
  if( !pRet ){
    rc = SQLITE_NOMEM;
  }else{
    pRet->eType = FTSQUERY_PHRASE;
    pRet->pPhrase = (Fts3Phrase *)&pRet[1];
    pRet->pPhrase->nToken = 1;
    pRet->pPhrase->iColumn = iCol;
    pRet->pPhrase->aToken[0].n = nToken;
    pRet->pPhrase->aToken[0].z = (char *)&pRet->pPhrase[1];
    memcpy(pRet->pPhrase->aToken[0].z, zToken, nToken);

    if( iEnd<n && z[iEnd]=='*' ){
      pRet->pPhrase->aToken[0].isPrefix = 1;
      iEnd++;
    }

    while( 1 ){
      if( !sqlite3_fts3_enable_parentheses 
       && iStart>0 && z[iStart-1]=='-' 
      ){
        pParse->isNot = 1;
        iStart--;
      }else if( pParse->bFts4 && iStart>0 && z[iStart-1]=='^' ){
        pRet->pPhrase->aToken[0].bFirst = 1;
        iStart--;
      }else{
        break;
      }
    }

  }
  *pnConsumed = iEnd;
}else if( i && rc==SQLITE_DONE ){
  rc = SQLITE_OK;
}

pModule->xClose(pCursor);

}

*ppExpr = pRet; return rc; }

## getNextString()

よくわからない。引数に知らない構造体もある。

static int getNextString( ParseContext pParse, / fts3 query parse context / const char zInput, int nInput, / Input string / Fts3Expr ppExpr / OUT: expression / ){ sqlite3_tokenizer pTokenizer = pParse->pTokenizer; sqlite3_tokenizer_module const pModule = pTokenizer->pModule; int rc; Fts3Expr p = 0; sqlite3_tokenizer_cursor pCursor = 0; char *zTemp = 0; int nTemp = 0;

const int nSpace = sizeof(Fts3Expr) + sizeof(Fts3Phrase); int nToken = 0;

/ The final Fts3Expr data structure, including the Fts3Phrase, Fts3PhraseToken structures token buffers are all stored as a single allocation so that the expression can be freed with a single call to sqlite3_free(). Setting this up requires a two pass approach. The first pass, in the block below, uses a tokenizer cursor to iterate through the tokens in the expression. This pass uses fts3ReallocOrFree() to assemble data in two dynamic buffers: Buffer p: Points to the Fts3Expr structure, followed by the Fts3Phrase structure, followed by the array of Fts3PhraseToken structures. This pass only populates the Fts3PhraseToken array. Buffer zTemp: Contains copies of all tokens. The second pass, in the block that begins “if( rc==SQLITE_DONE )” below, appends buffer zTemp to buffer p, and fills in the Fts3Expr and Fts3Phrase ** structures. / rc = sqlite3Fts3OpenTokenizer( pTokenizer, pParse->iLangid, zInput, nInput, &pCursor); if( rc==SQLITE_OK ){ int ii; for(ii=0; rc==SQLITE_OK; ii++){ const char zByte; int nByte = 0, iBegin = 0, iEnd = 0, iPos = 0; rc = pModule->xNext(pCursor, &zByte, &nByte, &iBegin, &iEnd, &iPos); if( rc==SQLITE_OK ){ Fts3PhraseToken pToken;

    p = fts3ReallocOrFree(p, nSpace + ii*sizeof(Fts3PhraseToken));
    if( !p ) goto no_mem;

    zTemp = fts3ReallocOrFree(zTemp, nTemp + nByte);
    if( !zTemp ) goto no_mem;

    assert( nToken==ii );
    pToken = &((Fts3Phrase *)(&p[1]))->aToken[ii];
    memset(pToken, 0, sizeof(Fts3PhraseToken));

    memcpy(&zTemp[nTemp], zByte, nByte);
    nTemp += nByte;

    pToken->n = nByte;
    pToken->isPrefix = (iEnd<nInput && zInput[iEnd]=='*');
    pToken->bFirst = (iBegin>0 && zInput[iBegin-1]=='^');
    nToken = ii+1;
  }
}

pModule->xClose(pCursor);
pCursor = 0;

}

if( rc==SQLITE_DONE ){ int jj; char *zBuf = 0;

p = fts3ReallocOrFree(p, nSpace + nToken*sizeof(Fts3PhraseToken) + nTemp);
if( !p ) goto no_mem;
memset(p, 0, (char *)&(((Fts3Phrase *)&p[1])->aToken[0])-(char *)p);
p->eType = FTSQUERY_PHRASE;
p->pPhrase = (Fts3Phrase *)&p[1];
p->pPhrase->iColumn = pParse->iDefaultCol;
p->pPhrase->nToken = nToken;

zBuf = (char *)&p->pPhrase->aToken[nToken];
if( zTemp ){
  memcpy(zBuf, zTemp, nTemp);
  sqlite3_free(zTemp);
}else{
  assert( nTemp==0 );
}

for(jj=0; jj<p->pPhrase->nToken; jj++){
  p->pPhrase->aToken[jj].z = zBuf;
  zBuf += p->pPhrase->aToken[jj].n;
}
rc = SQLITE_OK;

}

*ppExpr = p; return rc; no_mem:

if( pCursor ){ pModule->xClose(pCursor); } sqlite3_free(zTemp); sqlite3_free(p); *ppExpr = 0; return SQLITE_NOMEM; }

## queryTestTokenizer()

`test`と名のつくものは無視していいかもしれない。

static int queryTestTokenizer( sqlite3 db, const char zName,
const sqlite3_tokenizer_module *pp ){ int rc; sqlite3_stmt pStmt; const char zSql[] = “SELECT fts3_tokenizer(?)”;

*pp = 0; rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0); if( rc!=SQLITE_OK ){ return rc; }

sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC); if( SQLITE_ROW==sqlite3_step(pStmt) ){ if( sqlite3_column_type(pStmt, 0)==SQLITE_BLOB ){ memcpy*1; } }

return sqlite3_finalize(pStmt); }

## fts3ExprTest()

`test`と名のつくものは無視していいかもしれない。

static void fts3ExprTest( sqlite3_context context, int argc, sqlite3_value **argv ){ sqlite3_tokenizer_module const pModule = 0; sqlite3_tokenizer pTokenizer = 0; int rc; char **azCol = 0; const char zExpr; int nExpr; int nCol; int ii; Fts3Expr pExpr; char zBuf = 0; sqlite3 *db = sqlite3_context_db_handle(context);

if( argc<3 ){ sqlite3_result_error(context, “Usage: fts3_exprtest(tokenizer, expr, col1, …”, -1 ); return; }

rc = queryTestTokenizer(db, (const char *)sqlite3_value_text(argv[0]), &pModule); if( rc==SQLITE_NOMEM ){ sqlite3_result_error_nomem(context); goto exprtest_out; }else if( !pModule ){ sqlite3_result_error(context, “No such tokenizer module”, -1); goto exprtest_out; }

rc = pModule->xCreate(0, 0, &pTokenizer); assert( rc==SQLITE_NOMEM || rc==SQLITE_OK ); if( rc==SQLITE_NOMEM ){ sqlite3_result_error_nomem(context); goto exprtest_out; } pTokenizer->pModule = pModule;

zExpr = (const char )sqlite3_value_text(argv[1]); nExpr = sqlite3_value_bytes(argv[1]); nCol = argc-2; azCol = (char **)sqlite3_malloc(nColsizeof(char )); if( !azCol ){ sqlite3_result_error_nomem(context); goto exprtest_out; } for(ii=0; ii<nCol; ii++){ azCol[ii] = (char )sqlite3_value_text(argv[ii+2]); }

if( sqlite3_user_data(context) ){ char *zDummy = 0; rc = sqlite3Fts3ExprParse( pTokenizer, 0, azCol, 0, nCol, nCol, zExpr, nExpr, &pExpr, &zDummy ); assert( rc==SQLITE_OK || pExpr==0 ); sqlite3_free(zDummy); }else{ rc = fts3ExprParseUnbalanced( pTokenizer, 0, azCol, 0, nCol, nCol, zExpr, nExpr, &pExpr ); }

if( rc!=SQLITE_OK && rc!=SQLITE_NOMEM ){ sqlite3Fts3ExprFree(pExpr); sqlite3_result_error(context, “Error parsing expression”, -1); }else if( rc==SQLITE_NOMEM || !(zBuf = exprToString(pExpr, 0)) ){ sqlite3_result_error_nomem(context); }else{ sqlite3_result_text(context, zBuf, -1, SQLITE_TRANSIENT); sqlite3_free(zBuf); }

sqlite3Fts3ExprFree(pExpr);

exprtest_out: if( pModule && pTokenizer ){ rc = pModule->xDestroy(pTokenizer); } sqlite3_free(azCol); }

## sqlite3_tokenizer_module構造体のインスタンス

static const sqlite3_tokenizer_module porterTokenizerModule = { 0, porterCreate, porterDestroy, porterOpen, porterClose, porterNext, 0 };

トークナイザの1つ。ポータートークナイザ。

## sqlite3Fts3PorterTokenizerModule()

SQLITE_PRIVATE void sqlite3Fts3PorterTokenizerModule( sqlite3_tokenizer_module const*ppModule ){ ppModule = &porterTokenizerModule; }

単にアドレスをコピーしているだけ。わざわざ関数にするのはなぜだ。使われているところも読まないとわからないか。

## sqlite3Fts3InitTokenizer()

トークナイザの初期化らしい。よくわからないが、メモリ確保やエラー確認をしているのだろうか。

SQLITE_PRIVATE int sqlite3Fts3InitTokenizer( Fts3Hash pHash, / Tokenizer hash table / const char zArg, / Tokenizer name / sqlite3_tokenizer ppTok, / OUT: Tokenizer (if applicable) / char pzErr / OUT: Set to malloced error message / ){ int rc; char z = (char )zArg; int n = 0; char zCopy; char zEnd; / Pointer to nul-term of zCopy / sqlite3_tokenizer_module *m;

zCopy = sqlite3_mprintf(“%s”, zArg); if( !zCopy ) return SQLITE_NOMEM; zEnd = &zCopy[strlen(zCopy)];

z = (char *)sqlite3Fts3NextToken(zCopy, &n); if( z==0 ){ assert( n==0 ); z = zCopy; } z[n] = ‘\0’; sqlite3Fts3Dequote(z);

m = (sqlite3_tokenizer_module )sqlite3Fts3HashFind(pHash,z,(int)strlen(z)+1); if( !m ){ sqlite3Fts3ErrMsg(pzErr, “unknown tokenizer: %s”, z); rc = SQLITE_ERROR; }else{ char const **aArg = 0; int iArg = 0; z = &z[n+1]; while( zxCreate(iArg, aArg, ppTok); assert( rc!=SQLITE_OK || ppTok ); if( rc!=SQLITE_OK ){ sqlite3Fts3ErrMsg(pzErr, “unknown tokenizer”); }else{ (ppTok)->pModule = m; } sqlite3_free((void )aArg); }

sqlite3_free(zCopy); return rc; }

## testFunc()

`test`と名のつくものは無視していいかもしれない。

static void testFunc( sqlite3_context context, int argc, sqlite3_value **argv ){ Fts3Hash pHash; sqlite3_tokenizer_module p; sqlite3_tokenizer pTokenizer = 0; sqlite3_tokenizer_cursor *pCsr = 0;

const char *zErr = 0;

const char zName; int nName; const char zInput; int nInput;

const char *azArg[64];

const char *zToken; int nToken = 0; int iStart = 0; int iEnd = 0; int iPos = 0; int i;

Tcl_Obj *pRet;

if( argc<2 ){ sqlite3_result_error(context, “insufficient arguments”, -1); return; }

nName = sqlite3_value_bytes(argv[0]); zName = (const char )sqlite3_value_text(argv[0]); nInput = sqlite3_value_bytes(argv[argc-1]); zInput = (const char )sqlite3_value_text(argv[argc-1]);

pHash = (Fts3Hash )sqlite3_user_data(context); p = (sqlite3_tokenizer_module )sqlite3Fts3HashFind(pHash, zName, nName+1);

if( !p ){ char *zErr2 = sqlite3_mprintf(“unknown tokenizer: %s”, zName); sqlite3_result_error(context, zErr2, -1); sqlite3_free(zErr2); return; }

pRet = Tcl_NewObj(); Tcl_IncrRefCount(pRet);

for(i=1; i<argc-1; i++){ azArg[i-1] = (const char *)sqlite3_value_text(argv[i]); }

if( SQLITE_OK!=p->xCreate(argc-2, azArg, &pTokenizer) ){ zErr = “error in xCreate()”; goto finish; } pTokenizer->pModule = p; if( sqlite3Fts3OpenTokenizer(pTokenizer, 0, zInput, nInput, &pCsr) ){ zErr = “error in xOpen()”; goto finish; }

while( SQLITE_OK==p->xNext(pCsr, &zToken, &nToken, &iStart, &iEnd, &iPos) ){ Tcl_ListObjAppendElement(0, pRet, Tcl_NewIntObj(iPos)); Tcl_ListObjAppendElement(0, pRet, Tcl_NewStringObj(zToken, nToken)); zToken = &zInput[iStart]; nToken = iEnd-iStart; Tcl_ListObjAppendElement(0, pRet, Tcl_NewStringObj(zToken, nToken)); }

if( SQLITE_OK!=p->xClose(pCsr) ){ zErr = “error in xClose()”; goto finish; } if( SQLITE_OK!=p->xDestroy(pTokenizer) ){ zErr = “error in xDestroy()”; goto finish; }

finish: if( zErr ){ sqlite3_result_error(context, zErr, -1); }else{ sqlite3_result_text(context, Tcl_GetString(pRet), -1, SQLITE_TRANSIENT); } Tcl_DecrRefCount(pRet); }

# registerTokenizer()

トークナイザの登録処理か。

static int registerTokenizer( sqlite3 db, char zName, const sqlite3_tokenizer_module p ){ int rc; sqlite3_stmt pStmt; const char zSql[] = “SELECT fts3_tokenizer(?, ?)”;

rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0); if( rc!=SQLITE_OK ){ return rc; }

sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC); sqlite3_bind_blob(pStmt, 2, &p, sizeof(p), SQLITE_STATIC); sqlite3_step(pStmt);

return sqlite3_finalize(pStmt); }

## queryTokenizer()

select文を発行している？

static int queryTokenizer( sqlite3 db, char zName,
const sqlite3_tokenizer_module *pp ){ int rc; sqlite3_stmt pStmt; const char zSql[] = “SELECT fts3_tokenizer(?)”;

*pp = 0; rc = sqlite3_prepare_v2(db, zSql, -1, &pStmt, 0); if( rc!=SQLITE_OK ){ return rc; }

sqlite3_bind_text(pStmt, 1, zName, -1, SQLITE_STATIC); if( SQLITE_ROW==sqlite3_step(pStmt) ){ if( sqlite3_column_type(pStmt, 0)==SQLITE_BLOB ){ memcpy*2; } }

return sqlite3_finalize(pStmt); }

## sqlite3Fts3SimpleTokenizerModule()

SQLITE_PRIVATE void sqlite3Fts3SimpleTokenizerModule(sqlite3_tokenizer_module const**ppModule);

プロトタイプ宣言。

## sqlite3_tokenizer_module構造体のインスタンス

static const sqlite3_tokenizer_module simpleTokenizerModule = { 0, simpleCreate, simpleDestroy, simpleOpen, simpleClose, simpleNext, 0, };

トークナイザの1つ。シンプルトークナイザ。

## sqlite3Fts3SimpleTokenizerModule()

SQLITE_PRIVATE void sqlite3Fts3SimpleTokenizerModule( sqlite3_tokenizer_module const*ppModule ){ ppModule = &simpleTokenizerModule; }

## Fts3tokTable構造体

/ ** Virtual table structure. / struct Fts3tokTable { sqlite3_vtab base; / Base class used by SQLite core / const sqlite3_tokenizer_module pMod; sqlite3_tokenizer pTok; };

これが`CREATE VIRTUAL TABLE`文の実装に関する構造体なのだろうか。

## fts3tokQueryTokenizer構造体

VIRTUAL TABLEに対して検索する？

static int fts3tokQueryTokenizer( Fts3Hash pHash, const char zName, const sqlite3_tokenizer_module pp, char pzErr ){ sqlite3_tokenizer_module *p; int nName = (int)strlen(zName);

p = (sqlite3_tokenizer_module *)sqlite3Fts3HashFind(pHash, zName, nName+1); if( !p ){ sqlite3Fts3ErrMsg(pzErr, “unknown tokenizer: %s”, zName); return SQLITE_ERROR; }

*pp = p; return SQLITE_OK; }

## fts3tokConnectMethod()

VIRTUAL TABLEを作っている？

static int fts3tokConnectMethod( sqlite3 db, / Database connection / void pHash, / Hash table of tokenizers / int argc, / Number of elements in argv array / const char * const argv, / xCreate/xConnect argument array / sqlite3_vtab **ppVtab, / OUT: New sqlite3_vtab object / char **pzErr / OUT: sqlite3_malloc’d error message / ){ Fts3tokTable pTab = 0; const sqlite3_tokenizer_module pMod = 0; sqlite3_tokenizer pTok = 0; int rc; char **azDequote = 0; int nDequote;

rc = sqlite3_declare_vtab(db, FTS3_TOK_SCHEMA); if( rc!=SQLITE_OK ) return rc;

nDequote = argc-3; rc = fts3tokDequoteArray(nDequote, &argv[3], &azDequote);

if( rc==SQLITE_OK ){ const char zModule; if( nDequote<1 ){ zModule = “simple”; }else{ zModule = azDequote[0]; } rc = fts3tokQueryTokenizer*3; if( pTab==0 ){ rc = SQLITE_NOMEM; } }

if( rc==SQLITE_OK ){ memset(pTab, 0, sizeof(Fts3tokTable)); pTab->pMod = pMod; pTab->pTok = pTok; *ppVtab = &pTab->base; }else{ if( pTok ){ pMod->xDestroy(pTok); } }

sqlite3_free(azDequote); return rc; }

# 所感難しすぎる。なんとなく眺めてコードに慣れればそれでいいや。

*1:void )pp, sqlite3_column_blob(pStmt, 0), sizeof(pp

*2:void )pp, sqlite3_column_blob(pStmt, 0), sizeof(pp

*3:Fts3Hash)pHash, zModule, &pMod, pzErr); }

assert( (rc==SQLITE_OK)==(pMod!=0) ); if( rc==SQLITE_OK ){ const char * const azArg = (const char * const )&azDequote[1]; rc = pMod->xCreate((nDequote>1 ? nDequote-1 : 0), azArg, &pTok); }

if( rc==SQLITE_OK ){ pTab = (Fts3tokTable *)sqlite3_malloc(sizeof(Fts3tokTable

やってみる

アウトプットすべく己を導くためのブログ。その試行錯誤すらたれ流す。

SQLite3のソースコードのうちFTS3の部分を読んでみる

コードを見る

sqlite3_tokenizer_module 構造体

トークナイザ関数名(FTS3)

FTSバージョン

sqlite3Fts3Init()