天天看點

sphinx 采用c擴充xmlpipe2資料源

由于要采用sphinx 作為全文檢索引擎,但是我的很多資料都存在硬碟上,如果這些資料都讀入資料庫那就太痛苦了,資料量很大,mysql資料庫存儲這些資料,隻能是為了全文檢索,那也過于浪費了,是以決定采用xmlpipe2資料源,由于c語言的io操作能力超強為了效率問題也就是用,從不會寫的c語言。廢話少說,代碼如下

#include <stdio.h>

#include <stdlib.h>

#include <string.h>

#include <unistd.h>

#include <mysql.h>

#include <iconv.h>

#include "dictionary.h"

#include "iniparser.h"

//配置檔案名稱

#define INI_FILE_NAME "sanshi_xmlpipe.ini"

//sql多條語句分隔符号

#define SQL_SIGN "|"

//設定字段裡名字與字段值的分隔符

#define ATTRIBUTE_SIGN ":"

//設定多個字段間的分隔符

#define DIELD_SIGN ","

//設定讀取檔案的長度

#define READFILE_MAX_LEN 1024

//設定路徑+檔案的最大長度

#define FILE_NAME_MAX_LEN 1024

typedef struct tag_sanshi_ini_config

{

char * db_name;

char * db_user;

char * db_pwd;

char * db_host;

char * sql_query_pre;

char * sql_query;

char * sql_query_post;

char * sphinx_schema;

char * sphinx_id;

char * sphinx_file;

char * sphinx_other_field;

char * file_dir;

}sanshi_ini_config;

sanshi_ini_config conf={

"db:db_name",

"db:db_user",

"db:db_pwd",

"db:db_host",

"sql:sql_query_pre",

"sql:sql_query",

"sql:sql_query_post",

"xml:schema",

"xml:index_id",

"xml:file_field",

"xml:other_field",

"file:base_dir"

};

typedef struct tag_sanshi_field

{

char *field;

int id;

} sanshi_field;

sanshi_field parser_field(char * field_str)

{

sanshi_field temp;

char * buf = strstr( field_str, ATTRIBUTE_SIGN);

temp.id = atoi(buf + strlen(ATTRIBUTE_SIGN));

buf[0]='/0';

temp.field = field_str;

//temp.id = buf + strlen(ATTRIBUTE_SIGN);

//buf = strstr( field_str, ATTRIBUTE_SIGN);

//temp.id = atoi(buf + strlen(ATTRIBUTE_SIGN));

//printf("%s %s %d/n",field_str,temp.field,temp.id);

/*

char * p;

char field_tmp[strlen(field_str)+1];

strcpy(field_tmp,field_str);

//temp.field = strtok( field_str, ATTRIBUTE_SIGN);

//field_str = strtok( NULL, ATTRIBUTE_SIGN);

//temp.id = atoi(p);

printf("%s /n",field_tmp);

field_str =NULL;

*/

return temp;

}

void print_file_content(char * file_name)

{

FILE *fp;

char line[READFILE_MAX_LEN];

fp=fopen(file_name,"r");

if(fp!=NULL)

{

while(fgets(line,READFILE_MAX_LEN,fp)!=NULL)

{

printf(line);

}

fclose(fp);

}

}

void exec_mysql_query(MYSQL * mysql_con,char * sql_str)

{

char* token = strtok( sql_str, SQL_SIGN);

while( token != NULL )

{

int query_error_no=0;

query_error_no = mysql_query(mysql_con,token);

if(query_error_no !=0)

{

printf("ERROR sql=%s /nERROR NO=%d /nERROR msg= %s /n",token,query_error_no,mysql_error(mysql_con));

mysql_close(mysql_con);

exit(0);

}

mysql_free_result(mysql_store_result(mysql_con));

//printf("exec sql : %s /n",token);

token = strtok( NULL, SQL_SIGN);

}

}

int main(int argc,char * argv[])

{

dictionary * ini;

MYSQL mysql,*mysql_con;

MYSQL_RES *result;

MYSQL_ROW row;

int query_error_no,sphinx_id;

sanshi_field file_field;

ini = iniparser_load(INI_FILE_NAME);

//get ini config mysql set

conf.db_name = iniparser_getstring(ini,conf.db_name,"test");

conf.db_user = iniparser_getstring(ini,conf.db_user,"root");

conf.db_pwd = iniparser_getstring(ini,conf.db_pwd,"");

conf.db_host = iniparser_getstring(ini,conf.db_host,"localhost");

conf.sql_query_pre = iniparser_getstring(ini,conf.sql_query_pre,NULL);

conf.sql_query = iniparser_getstring(ini,conf.sql_query,NULL);

conf.sql_query_post = iniparser_getstring(ini,conf.sql_query_post,NULL);

conf.sphinx_schema = iniparser_getstring(ini,conf.sphinx_schema,NULL);

sphinx_id = iniparser_getint(ini,conf.sphinx_id,0);

conf.sphinx_file = iniparser_getstring(ini,conf.sphinx_file,NULL);

file_field = parser_field(conf.sphinx_file);

conf.sphinx_other_field = iniparser_getstring(ini,conf.sphinx_other_field,NULL);

conf.file_dir = iniparser_getstring(ini,conf.file_dir,"./");

//printf("db_name=%s /t db_user=%s /t db_pwd=%s /t db_host=%s /n",conf.db_name,conf.db_user,conf.db_pwd,conf.db_host);

//mysql connect

mysql_init(&mysql);

mysql_con =mysql_real_connect(&mysql,conf.db_host,conf.db_user,conf.db_pwd,conf.db_name,0,NULL,0);

if(mysql_con == NULL)

{

printf("ERROR: connect mysql fail! plaese check ini file in set/n %s /n",mysql_error(&mysql));

exit(0);

}

//printf("mysql connect suc!/n");

//exec sql

exec_mysql_query(mysql_con,conf.sql_query_pre);

query_error_no = mysql_query(mysql_con,conf.sql_query);

if(query_error_no !=0)

{

printf("ERROR sql=%s /nERROR NO=%d /nERROR msg= %s /n",conf.sql_query,query_error_no,mysql_error(mysql_con));

mysql_close(mysql_con);

exit(0);

}

//printf("exec sql_query : %s /n",conf.sql_query);

result = mysql_store_result(mysql_con);

//echo xml header

printf("<?xml version=/"1.0/" encodeing=/"utf-8/"?>/n<sphinx:docset>/n%s/n",conf.sphinx_schema);

//printf("%s/n",conf.sphinx_other_field);

while(row = mysql_fetch_row(result))

{

printf("<sphinx:document id=/"%d/">/n",(row[sphinx_id]?row[sphinx_id]:0));

char * field_str;

char field_tmp[strlen(conf.sphinx_other_field)+1];

char temp_file_name[FILE_NAME_MAX_LEN];

memcpy(field_tmp,conf.sphinx_other_field,strlen(conf.sphinx_other_field)+1);

field_str = strtok( field_tmp, DIELD_SIGN);

while( field_str != NULL )

{

//printf("%s/n",field_str);

sanshi_field other_field = parser_field(field_str);

printf("

本人第一次寫c,有些地方優化部夠,還望各位指點

配置檔案如下:

[db]

db_host=127.0.0.1

db_name=test

db_pwd=123456

db_user=root

[sql]

sql_query_pre=select * from log|select * from log

sql_query = select * from log

sql_query_post =

[xml]

schema=/

<sphinx:schema>/

<sphinx:field name="LogActionType"/>/

<sphinx:field name="LogDataType"/>/

<sphinx:attr name="LogTime" type="timestamp"/>/

<sphinx:attr name="LogIP" type="int" bits="16" default="1"/>/

</sphinx:schema>

index_id=0

file_field=LogActionType:1

other_field=LogDataType:2,LogTime:6,LogIP:4

[file]

base_dir=

備注:該程式的ini解析用到的是iniparser3.0b 的源代碼,也就是依賴的2個h檔案

#include "dictionary.h"

#include "iniparser.h"

       編譯的指令為:

gcc -I /data/app/mysql/include/mysql/ -L /data/app/mysql/lib/mysql/ -l mysqlclient -g -o sanshi sanshi_xmlpipe.c dictionary.c iniparser.c

注意msql的庫路徑,已經檔案名