天天看点

用expat解析XML文件小结

参考:http://blog.csdn.net/sinat_29830917/article/details/70241786

    http://blog.csdn.net/sunmoon631/article/details/6438617

首先贴上代码:

#include <stdio.h>
#include <string.h>
#include <stdlib.h>
#include "expat/expat.h"

#include <sys/types.h>
#include <sys/stat.h>
#include <unistd.h>
#include <fcntl.h>

#define FILE_PATH "test.xml"
int read_xml_file(const char *file_name,char **buff,int *len);
struct XML_ParserStruct * expat_xml_init(char *data);
void start_func(void *userData, const XML_Char *name, const XML_Char **atts);
void end_func(void *userData, const XML_Char *name);
void deal_data(void *userData, const XML_Char *s, int len);

int main(int argc,char * argv[])
{
        struct XML_ParserStruct * parser;
        int fd,len,isFinal = 0;
        char *buff;
        char data[500];

        read_xml_file(FILE_PATH,&buff,&len);
        if(buff == NULL){
                printf("read xml file is fail! \n");
                exit -1;
        }
        printf("buff is = %s \n",buff);

        parser = expat_xml_init(data);
        if(parser == NULL){
                perror("expat xml init fail!\n");
                exit -1;
        }
        if(XML_Parse(parser, buff, len,isFinal) == XML_STATUS_ERROR){
                fprintf(stderr,
                        "%s at line %lu\n",
                        XML_ErrorString(XML_GetErrorCode(parser)),
                        XML_GetCurrentLineNumber(parser));
                printf("XML_Parse error : %s \n",(char *)stderr);
        }
        free(buff);
        return 0;
}
/****************************************************************
N A M E:read_xml_file

F U N C:open FILENAME .
        get FILENAME data len save to param 3.
        read data save to param 2;

RETURN:0,SUCCESS
       1,FAIL
****************************************************************/
int read_xml_file(const char *FILENAME,char **buff,int * len)
{
        struct stat st;
        int fd;
        fd = open(FILENAME, O_RDONLY);
        if(fd < 0){
                perror("open");
                return -1;
        }
        fstat(fd, &st);
        *len = st.st_size;
        *buff = (char *)malloc(*len+1);
        if(*buff == NULL){
                perror("malloc");
                return -1;
        }
        memset(*buff,0,*len+1);
        read(fd, *buff, *len);
        close(fd);
        return 0;
}
/******************************************************************
N A M E:expat_xml_init

F U N C:set expat func! 
        param must bigger,read xml all file!

RETURN :NULL is fail!
******************************************************************/
struct XML_ParserStruct * expat_xml_init(char *data)
{
        struct XML_ParserStruct * parser;

        parser = XML_ParserCreate(NULL);
        if(parser == NULL){
                perror("XML_ParserCreate");
                return NULL;
        }
        XML_SetUserData(parser, data);  
        XML_SetElementHandler(parser,start_func,end_func);
        XML_SetCharacterDataHandler(parser, deal_data);
        return parser;
}
/******************************************************************
N A M E:start_func

F U N C:install to XML_SetElementHandler!
        get begin label vale.
        such as : <end ...>
                 name = "end"
        other is save to param 3 -> atts.

RETURN :void
******************************************************************/
void start_func(void *userData, const XML_Char *name, const XML_Char **atts)
{
        int i;
        char *data = (char *)userData;
        printf("start name = %s \n",(char *)name);
        for(i = 0;atts[i];i+=2){
                printf("%s = %s \n",(char *)atts[i],(char *)atts[i+1]);
        }
        printf("start name over!\n");
}
/******************************************************************
N A M E:end_func

F U N C:install to XML_SetElementHandler!
        get end value!
        such as:</end>
                name = "end"
RETURN :void
******************************************************************/
void end_func(void *userData, const XML_Char *name)
{
        char *data = (char *)userData;
        printf("end name = %s \n",(char *)name);
}
/******************************************************************
N A M E:deal_data

F U N C:install to XML_SetCharacterDataHandler

RETURN :void
******************************************************************/
void deal_data(void *userData, const XML_Char *s, int len)
{
        char *data = userData;
        char *buff;
        printf("deal data!\n");
        printf("all data = %s \n",(char *)s);
        buff = (char *)malloc(len+1);
        if(buff){
                memset(buff,0,len+1);
                memcpy(buff,s,len);
                printf("we need data = %s\n",buff);
                free(buff);
        }
        printf("deal data over! \n");
}
                                             
           

再贴上test.xml文件的代码:

<feed version="2.0" ctxt-id="9212" template-id="default" feed-type="ftti">my name is yuzhihuang!<set version="3.0">only test!</set></feed>
           

再贴上编译后运行的结果:

buff is = <feed version="2.0" ctxt-id="9212" template-id="default" feed-type="ftti">my name is yuzhihuang!<set version="3.0">only test!</set></feed>
 
start name = feed 
version = 2.0 
ctxt-id = 9212 
template-id = default 
feed-type = ftti 
start name over!
deal data!
all data = my name is yuzhihuang!<set version="3.0">only test!</set></feed>
 
we need data = my name is yuzhihuang!
deal data over! 
start name = set 
version = 3.0 
start name over!
deal data!
all data = only test!</set></feed>
 
we need data = only test!
deal data over! 
end name = set 
end name = feed 
           

注意:是调用了XML_Parse这个函数才去解析XML文件。

*********************************************摘取自:http://blog.csdn.net/sinat_29830917/article/details/70241786

XML_Parse(XML_Parser parser, const char *s, int len, int isFinal)

       第二个参数是用户指定的Buffer指针, 第三个是这块Buffer中实际内容的字节数,最后参数代表是否这块Buffer已经结束。比如要解析的XML文件太大,但内存比较吃紧,Buffer比较小,则可以循环读取文件,然后丢给Parser,  在文件读取结束前,isFinal参数为FALSE,反之为TRUE。

      这里的Buffer如果太小则会造成上面提到那个隐晦的问题,

      XML_CharacterDataHandler一次返回的可能并不是完整的CharData,比如这个charData的Len大于你的Buffer大小,那这是会连续调用2次XML_CharacterDataHandler,我们需要将2次结果拼接起来,以得到正确结果,因此我们的状态机一定要考虑到这点。

****************************************************************结束摘取*******************************************************************

xml文件内容要按xml语法写,不然会解析出错。

一开始test.xml文件内容如下:

<feed version="2.0" ctxt-id="9212" template-id="default" feed-type="ftti">my name is yuzhihuang!</feed>
<set version="3.0">only test!</set>
           

一直出现: junk after document element这样的错误

就是说一个xml文件里面只能有一个主节点,多个主节点就解析不出来啦!

在主节点中包含了多个节点,XML_Parse这个函数会循环的调用

start_func(void *userData, const XML_Char *name, const XML_Char **atts)
           

解析下一个节点的信息 。根据节点名称name来区别(见上面的运行结果)!

附上整份代码百度云链接:http://pan.baidu.com/s/1mi5GmXq

继续阅读