天天看点

NLPIR/ICTCLAS 2015 之Java接口使用以及去除词语后面的词性

今天学习了如何在Java项目当中调用NLPIR/ICTCLAS 2015 ,毕竟张博士的这个分词软件很大的一部分用途也是为了项目服务的

1.新建一个Java项目,导入jar包,我的位置是汉语分词20140928\sample\Java\jnaTest\jnaTest

2.将项目所需的NLPIR.dll 以及NLPIR.lib放到项目对应的文件夹中,还有Data文件夹也要放进去,这边的NLPIR.dll文件注意要使用相对应的版本,我用的是win64

3.新建一个接口,代码可以查看相关文档

/*
 * 
 * @author Harry 
 * */
import com.sun.jna.Library;

public interface CLibrary extends Library{

    //初始化
    public int NLPIR_Init(String sDataPath, int encoding, String sLicenceCode);
    //对字符串进行分词
    public String NLPIR_ParagraphProcess(String sSrc, int bPOSTagged);
    //对TXT文件内容进行分词
    public double NLPIR_FileProcess(String sSourceFilename,String sResultFilename, int bPOStagged);
    //从字符串中提取关键词 
    public String NLPIR_GetKeyWords(String sLine, int nMaxKeyLimit,boolean bWeightOut);
    //从TXT文件中提取关键词 
    public String NLPIR_GetFileKeyWords(String sLine, int nMaxKeyLimit,boolean bWeightOut);
    //添加单条用户词典
    public int NLPIR_AddUserWord(String sWord);
    //删除单条用户词典
    public int NLPIR_DelUsrWord(String sWord);
    //从TXT文件中导入用户词典
    public int NLPIR_ImportUserDict(String sFilename);
    //将用户词典保存至硬盘
    public int NLPIR_SaveTheUsrDic();
    //从字符串中获取新词
    public String NLPIR_GetNewWords(String sLine, int nMaxKeyLimit, boolean bWeightOut);
    //从TXT文件中获取新词
    public String NLPIR_GetFileNewWords(String sTextFile,int nMaxKeyLimit, boolean bWeightOut);
    //获取一个字符串的指纹值
    public long NLPIR_FingerPrint(String sLine);
    //设置要使用的POS map
    public int NLPIR_SetPOSmap(int nPOSmap);
    //获取报错日志
    public String NLPIR_GetLastErrorMsg();
    //退出
    public void NLPIR_Exit();    
}
           

然后再新建class开始直接使用

import java.math.BigDecimal;
import com.sun.jna.Native;

public class Demo {

    
    public static void main(String[] args) throws Exception {
        //初始化
        CLibrary instance = (CLibrary)Native.loadLibrary(System.getProperty("user.dir")+"\\source\\NLPIR", CLibrary.class);
        int init_flag = instance.NLPIR_Init("", 1, "0");
        String resultString = null;
        if (0 == init_flag) {
            resultString = instance.NLPIR_GetLastErrorMsg();
            System.err.println("初始化失败!\n"+resultString);
            return;
        }
                
        String sInput = "哎!那个金刚圈尺寸太差,前重后轻,左宽右窄,他戴上去很不舒服,"
                + "整晚失眠会连累我嘛,他虽然是只猴子,但你也不能这样对他啊,官府知道会说我虐待动物的,"
                + "说起那个金刚圈,啊~去年我在陈家村认识了一个铁匠,他手工精美,价钱又公道,童叟无欺,"
                + "干脆我介绍你再定做一个吧!";

        try {
            resultString = instance.NLPIR_ParagraphProcess(sInput, 1);
            System.out.println("分词结果为:\n " + resultString);
            
            instance.NLPIR_AddUserWord("金刚圈");
            instance.NLPIR_AddUserWord("左宽右窄");
            resultString = instance.NLPIR_ParagraphProcess(sInput, 1);
            System.out.println("增加用户词典后分词结果为:\n" + resultString);
            
            instance.NLPIR_DelUsrWord("左宽右窄");
            resultString = instance.NLPIR_ParagraphProcess(sInput, 1);
            System.out.println("删除用户词典后分词结果为:\n" + resultString);
            
            instance.NLPIR_ImportUserDict(System.getProperty("user.dir")+"\\source\\userdic.txt");
            resultString = instance.NLPIR_ParagraphProcess(sInput, 1);
            System.out.println("导入用户词典文件后分词结果为:\n" + resultString);
            
            resultString = instance.NLPIR_GetKeyWords(sInput,10,false);
            System.out.println("从段落中提取的关键词:\n" + resultString);
            
            resultString = instance.NLPIR_GetNewWords(sInput, 10, false);
            System.out.println("新词提取结果为:\n" + resultString);
            
            Double d = instance.NLPIR_FileProcess("d:\\1.txt", "d:\\2.txt", 1);
            
            System.out.println("对文件内容进行分词的运行速度为: " );
            if(d.isInfinite())
                System.out.println("无结果");
            else{
                BigDecimal b = new BigDecimal(d);
                System.out.println(b.divide(new BigDecimal(1000), 2, BigDecimal.ROUND_HALF_UP)+"秒");                
            }
            resultString = instance.NLPIR_GetFileKeyWords("H:\\Java项目\\WeiboCrawler\\所截取的微博数据.txt", 10,false);
            System.out.println("从文件中提取关键词的结果为:\n" + resultString);            
            
            instance.NLPIR_Exit();

        } catch (Exception e) {
            System.out.println("错误信息:");
            e.printStackTrace();
        }

    }
}
           

一般情况下就可以使用了,

至于之前有人在私信问我词语后面的字母是什么东西,那个是每个词语的词性,比如名词代词之类的,如果想要删除掉词性让结果只出现汉字,就在

public String NLPIR_ParagraphProcess(String sSrc, int bPOSTagged);
           

这个函数当中,将调用时使用的参数在 bPOSTagged的位置改为0

如果想知道为什么,可以使用Java Decompiler 反编译查看张博士所写的Library代码

这边复制了一下,

package com.sun.jna;

import java.lang.reflect.InvocationHandler;
import java.lang.reflect.Method;
import java.lang.reflect.Proxy;
import java.util.HashMap;
import java.util.Map;
import java.util.WeakHashMap;

public abstract interface Library
{
  public static final String OPTION_TYPE_MAPPER = "type-mapper";
  public static final String OPTION_FUNCTION_MAPPER = "function-mapper";
  public static final String OPTION_INVOCATION_MAPPER = "invocation-mapper";
  public static final String OPTION_STRUCTURE_ALIGNMENT = "structure-alignment";
  public static final String OPTION_STRING_ENCODING = "string-encoding";
  public static final String OPTION_ALLOW_OBJECTS = "allow-objects";
  public static final String OPTION_CALLING_CONVENTION = "calling-convention";
  public static final String OPTION_OPEN_FLAGS = "open-flags";
  public static final String OPTION_CLASSLOADER = "classloader";

  public static class Handler
    implements InvocationHandler
  {
    static final Method OBJECT_TOSTRING;
    static final Method OBJECT_HASHCODE;
    static final Method OBJECT_EQUALS;
    private final NativeLibrary nativeLibrary;
    private final Class interfaceClass;
    private final Map options;
    private final InvocationMapper invocationMapper;
    private final Map functions = new WeakHashMap();

    public Handler(String libname, Class interfaceClass, Map options) {
      if ((libname != null) && ("".equals(libname.trim()))) {
        throw new IllegalArgumentException("Invalid library name \"" + libname + "\"");
      }

      this.interfaceClass = interfaceClass;
      options = new HashMap(options);
      int callingConvention = AltCallingConvention.class.isAssignableFrom(interfaceClass) ? 1 : 0;

      if (options.get("calling-convention") == null) {
        options.put("calling-convention", new Integer(callingConvention));
      }

      if (options.get("classloader") == null) {
        options.put("classloader", interfaceClass.getClassLoader());
      }
      this.options = options;
      this.nativeLibrary = NativeLibrary.getInstance(libname, options);
      this.invocationMapper = ((InvocationMapper)options.get("invocation-mapper"));
    }

    public NativeLibrary getNativeLibrary() {
      return this.nativeLibrary;
    }

    public String getLibraryName() {
      return this.nativeLibrary.getName();
    }

    public Class getInterfaceClass() {
      return this.interfaceClass;
    }

    public Object invoke(Object proxy, Method method, Object[] inArgs)
      throws Throwable
    {
      if (OBJECT_TOSTRING.equals(method)) {
        return "Proxy interface to " + this.nativeLibrary;
      }
      if (OBJECT_HASHCODE.equals(method)) {
        return new Integer(hashCode());
      }
      if (OBJECT_EQUALS.equals(method)) {
        Object o = inArgs[0];
        if ((o != null) && (Proxy.isProxyClass(o.getClass()))) {
          return Function.valueOf(Proxy.getInvocationHandler(o) == this);
        }
        return Boolean.FALSE;
      }

      FunctionInfo f = null;
      synchronized (this.functions) {
        f = (FunctionInfo)this.functions.get(method);
        if (f == null) {
          f = new FunctionInfo(null);
          f.isVarArgs = Function.isVarArgs(method);
          if (this.invocationMapper != null) {
            f.handler = this.invocationMapper.getInvocationHandler(this.nativeLibrary, method);
          }
          if (f.handler == null)
          {
            f.function = this.nativeLibrary.getFunction(method.getName(), method);
            f.options = new HashMap(this.options);
            f.options.put("invoking-method", method);
          }
          this.functions.put(method, f);
        }
      }
      if (f.isVarArgs) {
        inArgs = Function.concatenateVarArgs(inArgs);
      }
      if (f.handler != null) {
        return f.handler.invoke(proxy, method, inArgs);
      }
      return f.function.invoke(method.getReturnType(), inArgs, f.options);
    }

    static
    {
      try
      {
        OBJECT_TOSTRING = Object.class.getMethod("toString", new Class[0]);
        OBJECT_HASHCODE = Object.class.getMethod("hashCode", new Class[0]);
        OBJECT_EQUALS = Object.class.getMethod("equals", new Class[] { Object.class });
      }
      catch (Exception e) {
        throw new Error("Error retrieving Object.toString() method");
      }
    }

    private static class FunctionInfo
    {
      InvocationHandler handler;
      Function function;
      boolean isVarArgs;
      Map options;
    }
  }
}
           

- - 其实我想修改他的代码的,毕竟跟我目前所做的项目还是有点冲突所在,但是不知道怎么改- - 

继续阅读