天天看点

JDK源码阅读之String

String是平常使用最多的结构之一,String提供来很多方便的接口来处理字符串,同时其支持正则表达式,所以字符串处理能力很强,下面我们简单介绍下String的内部实现,重点关注下常用的一些方法实现,String的一些方法涉及到字符编码,这里不介绍字符编码相关的。

//String类是final的,即不可继承
public final class String implements java.io.Serializable, Comparable<String>, CharSequence {
    private final char value[];//内部存储结构,即以数组来存储其内容
    private int hash;//标识一个字符串
//实现空字符串
public String() {
    this.value = new char[0];
}
//用字符串构造字符串的
public String(String original) {
     this.value = original.value;
     this.hash = original.hash;
}
//用字符数组构造字符串
public String(char value[]) {
     this.value = Arrays.copyOf(value, value.length);
}
//如果以字节数组构造,则涉及到字符编码的问题
public String(byte bytes[], Charset charset) {
      this(bytes, 0, bytes.length, charset);
}
//获取长度信息,因为数组提供了长度信息,所以直接获取,否则可以提供一个长度信息
public int length() {
     return value.length;
}
//判断是否为空
public boolean isEmpty() {
     return value.length == 0;
}
//String也不是完全不可变的,这里可以直接修改内容
public char charAt(int index) {
   if ((index < 0) || (index >= value.length)) {
            throw new StringIndexOutOfBoundsException(index);
   }
   return value[index];
}
//比较两字符串
public boolean equals(Object anObject) {
        if (this == anObject) {//判断是否指向同一个对象
            return true;
        }
        if (anObject instanceof String) {//判断是否是String类的对象
            String anotherString = (String) anObject;
            int n = value.length;//判断长度信息
            if (n == anotherString.value.length) {
                char v1[] = value;
                char v2[] = anotherString.value;
                int i = 0;
                while (n-- != 0) {
                    if (v1[i] != v2[i])//判断内容
                            return false;
                    i++;
                }
                return true;
            }
        }
        return false;
}
//比较两字符串的大小,比较过程和equals类似
public int compareTo(String anotherString) {
        int len1 = value.length;
        int len2 = anotherString.value.length;
        int lim = Math.min(len1, len2);
        char v1[] = value;
        char v2[] = anotherString.value;
        int k = 0;
        while (k < lim) {
            char c1 = v1[k];
            char c2 = v2[k];
            if (c1 != c2) {
                return c1 - c2;
            }
            k++;
        }
        return len1 - len2;
}
//忽略大小写的一种判断方法
public int compare(String s1, String s2) {
            int n1 = s1.length();
            int n2 = s2.length();
            int min = Math.min(n1, n2);
            for (int i = 0; i < min; i++) {
                char c1 = s1.charAt(i);
                char c2 = s2.charAt(i);
                if (c1 != c2) {//判断是否相等
                    c1 = Character.toUpperCase(c1);//如果不相等,则都转换成大写进行判断
                    c2 = Character.toUpperCase(c2);
                    if (c1 != c2) {
                        c1 = Character.toLowerCase(c1);//转换成小写判断
                        c2 = Character.toLowerCase(c2);
                        if (c1 != c2) {
                            return c1 - c2;
                        }
                    }
                }
            }
            return n1 - n2;
}
//比较两字符串是否相等
public boolean regionMatches(int toffset, String other, int ooffset,
            int len) {
        char ta[] = value;
        int to = toffset;
        char pa[] = other.value;
        int po = ooffset;
        if ((ooffset < 0) || (toffset < 0) //判断参数有效性
                || (toffset > (long)value.length - len)
                || (ooffset > (long)other.value.length - len)) {
            return false;
        }
        while (len-- > 0) { //比较内容
            if (ta[to++] != pa[po++]) {
                return false;
            }
        }
        return true;
}
//判断是否以字串开始
public boolean startsWith(String prefix, int toffset) {
        char ta[] = value;
        int to = toffset;
        char pa[] = prefix.value;
        int po = 0;
        int pc = prefix.value.length;
        if ((toffset < 0) || (toffset > value.length - pc)) {//判断参数有效性
            return false;
        }
        while (--pc >= 0) { //比较内容
            if (ta[to++] != pa[po++]) {
                return false;
            }
        }
        return true;
}
//startWith的逆操作
public boolean endsWith(String suffix) {
        return startsWith(suffix, value.length - suffix.value.length);
}
//查找字符
public int indexOf(int ch, int fromIndex) {
        final int max = value.length;
        if (fromIndex < 0) {//参数有效性判断
            fromIndex = 0;
        } else if (fromIndex >= max) {
            return -1;
        }
        //if是执行字符有效性判断
        if (ch < Character.MIN_SUPPLEMENTARY_CODE_POINT) {
            final char[] value = this.value;//获取字符串内容
            for (int i = fromIndex; i < max; i++) {
                if (value[i] == ch) {//比较内容
                    return i;
                }
            }
            return -1;
        } else {
            return indexOfSupplementary(ch, fromIndex);
        }
}
//删除空格
public String trim() {
        int len = value.length;
        int st = 0;
        char[] val = value;
        while ((st < len) && (val[st] <= ' ')) {//删除头部空格
            st++;
        }
        while ((st < len) && (val[len - 1] <= ' ')) {//删除尾部空格
            len--;
        }
        return ((st > 0) || (len < value.length)) ? substring(st, len) : this;//重新构造字符串
} 
//返回字符数组
public char[] toCharArray() {
        char result[] = new char[value.length];
        System.arraycopy(value, 0, result, 0, value.length);//执行内存copy
        return result;
}
//执行字符串分割操作
public String[] split(String regex, int limit) {
        char ch = 0;
        //如果分割符是这些字符,则自行进行分割
        if (((regex.value.length == 1 &&
             ".$|()[{^?*+\\".indexOf(ch = regex.charAt(0)) == -1) ||
             (regex.length() == 2 &&
              regex.charAt(0) == '\\' &&
              (((ch = regex.charAt(1))-'0')|('9'-ch)) < 0 &&
              ((ch-'a')|('z'-ch)) < 0 &&
              ((ch-'A')|('Z'-ch)) < 0)) &&
            (ch < Character.MIN_HIGH_SURROGATE ||
             ch > Character.MAX_LOW_SURROGATE))
        {
            int off = 0;
            int next = 0;
            boolean limited = limit > 0;
            ArrayList<String> list = new ArrayList<>();//借助链表来存储分割的元素
            while ((next = indexOf(ch, off)) != -1) {//定位元素
                if (!limited || list.size() < limit - 1) {
                    list.add(substring(off, next));//从主串里面substring分割元素
                    off = next + 1;
                } else {//判断模式是否启用,而且已经使用的次数大于limit
                    list.add(substring(off, value.length));
                    off = value.length;
                    break;
                }
            }
            //没有该字符,则返回完整的串
            if (off == 0)
                return new String[]{this};
            //模式阀值未超过,则添加剩余的串
            if (!limited || list.size() < limit)
                list.add(substring(off, value.length));
            int resultSize = list.size();
            if (limit == 0)
                while (resultSize > 0 && list.get(resultSize - 1).length() == 0)
                    resultSize--;
            String[] result = new String[resultSize];
            return list.subList(0, resultSize).toArray(result);//返回字符数组
        }
        //否则直接调用正则表达式进行分割
        return Pattern.compile(regex).split(this, limit);
}