天天看点

sphinx 采用c扩展xmlpipe2数据源

由于要采用sphinx 作为全文检索引擎,但是我的很多数据都存在硬盘上,如果这些数据都读入数据库那就太痛苦了,数据量很大,mysql数据库存储这些数据,只能是为了全文检索,那也过于浪费了,所以决定采用xmlpipe2数据源,由于c语言的io操作能力超强为了效率问题也就是用,从不会写的c语言。废话少说,代码如下

#include <stdio.h>

#include <stdlib.h>

#include <string.h>

#include <unistd.h>

#include <mysql.h>

#include <iconv.h>

#include "dictionary.h"

#include "iniparser.h"

//配置文件名称

#define INI_FILE_NAME "sanshi_xmlpipe.ini"

//sql多条语句分隔符号

#define SQL_SIGN "|"

//设置字段里名字与字段值的分隔符

#define ATTRIBUTE_SIGN ":"

//设置多个字段间的分隔符

#define DIELD_SIGN ","

//设置读取文件的长度

#define READFILE_MAX_LEN 1024

//设置路径+文件的最大长度

#define FILE_NAME_MAX_LEN 1024

typedef struct tag_sanshi_ini_config

{

char * db_name;

char * db_user;

char * db_pwd;

char * db_host;

char * sql_query_pre;

char * sql_query;

char * sql_query_post;

char * sphinx_schema;

char * sphinx_id;

char * sphinx_file;

char * sphinx_other_field;

char * file_dir;

}sanshi_ini_config;

sanshi_ini_config conf={

"db:db_name",

"db:db_user",

"db:db_pwd",

"db:db_host",

"sql:sql_query_pre",

"sql:sql_query",

"sql:sql_query_post",

"xml:schema",

"xml:index_id",

"xml:file_field",

"xml:other_field",

"file:base_dir"

};

typedef struct tag_sanshi_field

{

char *field;

int id;

} sanshi_field;

sanshi_field parser_field(char * field_str)

{

sanshi_field temp;

char * buf = strstr( field_str, ATTRIBUTE_SIGN);

temp.id = atoi(buf + strlen(ATTRIBUTE_SIGN));

buf[0]='/0';

temp.field = field_str;

//temp.id = buf + strlen(ATTRIBUTE_SIGN);

//buf = strstr( field_str, ATTRIBUTE_SIGN);

//temp.id = atoi(buf + strlen(ATTRIBUTE_SIGN));

//printf("%s %s %d/n",field_str,temp.field,temp.id);

/*

char * p;

char field_tmp[strlen(field_str)+1];

strcpy(field_tmp,field_str);

//temp.field = strtok( field_str, ATTRIBUTE_SIGN);

//field_str = strtok( NULL, ATTRIBUTE_SIGN);

//temp.id = atoi(p);

printf("%s /n",field_tmp);

field_str =NULL;

*/

return temp;

}

void print_file_content(char * file_name)

{

FILE *fp;

char line[READFILE_MAX_LEN];

fp=fopen(file_name,"r");

if(fp!=NULL)

{

while(fgets(line,READFILE_MAX_LEN,fp)!=NULL)

{

printf(line);

}

fclose(fp);

}

}

void exec_mysql_query(MYSQL * mysql_con,char * sql_str)

{

char* token = strtok( sql_str, SQL_SIGN);

while( token != NULL )

{

int query_error_no=0;

query_error_no = mysql_query(mysql_con,token);

if(query_error_no !=0)

{

printf("ERROR sql=%s /nERROR NO=%d /nERROR msg= %s /n",token,query_error_no,mysql_error(mysql_con));

mysql_close(mysql_con);

exit(0);

}

mysql_free_result(mysql_store_result(mysql_con));

//printf("exec sql : %s /n",token);

token = strtok( NULL, SQL_SIGN);

}

}

int main(int argc,char * argv[])

{

dictionary * ini;

MYSQL mysql,*mysql_con;

MYSQL_RES *result;

MYSQL_ROW row;

int query_error_no,sphinx_id;

sanshi_field file_field;

ini = iniparser_load(INI_FILE_NAME);

//get ini config mysql set

conf.db_name = iniparser_getstring(ini,conf.db_name,"test");

conf.db_user = iniparser_getstring(ini,conf.db_user,"root");

conf.db_pwd = iniparser_getstring(ini,conf.db_pwd,"");

conf.db_host = iniparser_getstring(ini,conf.db_host,"localhost");

conf.sql_query_pre = iniparser_getstring(ini,conf.sql_query_pre,NULL);

conf.sql_query = iniparser_getstring(ini,conf.sql_query,NULL);

conf.sql_query_post = iniparser_getstring(ini,conf.sql_query_post,NULL);

conf.sphinx_schema = iniparser_getstring(ini,conf.sphinx_schema,NULL);

sphinx_id = iniparser_getint(ini,conf.sphinx_id,0);

conf.sphinx_file = iniparser_getstring(ini,conf.sphinx_file,NULL);

file_field = parser_field(conf.sphinx_file);

conf.sphinx_other_field = iniparser_getstring(ini,conf.sphinx_other_field,NULL);

conf.file_dir = iniparser_getstring(ini,conf.file_dir,"./");

//printf("db_name=%s /t db_user=%s /t db_pwd=%s /t db_host=%s /n",conf.db_name,conf.db_user,conf.db_pwd,conf.db_host);

//mysql connect

mysql_init(&mysql);

mysql_con =mysql_real_connect(&mysql,conf.db_host,conf.db_user,conf.db_pwd,conf.db_name,0,NULL,0);

if(mysql_con == NULL)

{

printf("ERROR: connect mysql fail! plaese check ini file in set/n %s /n",mysql_error(&mysql));

exit(0);

}

//printf("mysql connect suc!/n");

//exec sql

exec_mysql_query(mysql_con,conf.sql_query_pre);

query_error_no = mysql_query(mysql_con,conf.sql_query);

if(query_error_no !=0)

{

printf("ERROR sql=%s /nERROR NO=%d /nERROR msg= %s /n",conf.sql_query,query_error_no,mysql_error(mysql_con));

mysql_close(mysql_con);

exit(0);

}

//printf("exec sql_query : %s /n",conf.sql_query);

result = mysql_store_result(mysql_con);

//echo xml header

printf("<?xml version=/"1.0/" encodeing=/"utf-8/"?>/n<sphinx:docset>/n%s/n",conf.sphinx_schema);

//printf("%s/n",conf.sphinx_other_field);

while(row = mysql_fetch_row(result))

{

printf("<sphinx:document id=/"%d/">/n",(row[sphinx_id]?row[sphinx_id]:0));

char * field_str;

char field_tmp[strlen(conf.sphinx_other_field)+1];

char temp_file_name[FILE_NAME_MAX_LEN];

memcpy(field_tmp,conf.sphinx_other_field,strlen(conf.sphinx_other_field)+1);

field_str = strtok( field_tmp, DIELD_SIGN);

while( field_str != NULL )

{

//printf("%s/n",field_str);

sanshi_field other_field = parser_field(field_str);

printf("

本人第一次写c,有些地方优化部够,还望各位指点

配置文件如下:

[db]

db_host=127.0.0.1

db_name=test

db_pwd=123456

db_user=root

[sql]

sql_query_pre=select * from log|select * from log

sql_query = select * from log

sql_query_post =

[xml]

schema=/

<sphinx:schema>/

<sphinx:field name="LogActionType"/>/

<sphinx:field name="LogDataType"/>/

<sphinx:attr name="LogTime" type="timestamp"/>/

<sphinx:attr name="LogIP" type="int" bits="16" default="1"/>/

</sphinx:schema>

index_id=0

file_field=LogActionType:1

other_field=LogDataType:2,LogTime:6,LogIP:4

[file]

base_dir=

备注:该程序的ini解析用到的是iniparser3.0b 的源代码,也就是依赖的2个h文件

#include "dictionary.h"

#include "iniparser.h"

       编译的命令为:

gcc -I /data/app/mysql/include/mysql/ -L /data/app/mysql/lib/mysql/ -l mysqlclient -g -o sanshi sanshi_xmlpipe.c dictionary.c iniparser.c

注意msql的库路径,已经文件名