一、簡介
本文主要講解HashMap的原理和部分源碼分析以及jdk8做的優化。
注: 本文章參考博文有: qazwyc 部落客的 Java8 HashMap源碼解析 , 尊重原創
二、分析
2.1 HashMap的結構
- 是一個連結清單散列的資料結構,即 數組和連結清單的結合體。這樣就兼具了尋址容易和增删友善的優點。結構示意圖如下:
-
JDK8源碼閱讀(七) HashMap TODO一、簡介二、分析
-
- HashMap内部維護了一個 Node<K,V>[] 的數組table, 然後基于鍊位址法來解決hash沖突。鍊位址法将哈希表的每個單元作為連結清單的頭結點,所有哈希位址為 i 的元素構成一個同義詞連結清單entrySet。即發生沖突時就把該關鍵字鍊在以該單元為頭結點的連結清單的尾部。
- jdk8采用了紅黑樹優化。
2.2 HashMap的類結構圖
2.3 HashMap的字段
2.3.0 字段清單圖
2.3.1 DEFAULT_INITIAL_CAPACITY
/**
* HashMap的初始容量為16
*/
static final int DEFAULT_INITIAL_CAPACITY = 1 << 4;
2.3.2 MAXIMUM_CAPACITY
/**
* 最大容量,超過這個容量仍是這個容量 2的30次方
*/
static final int MAXIMUM_CAPACITY = 1 << 30;
2.3.3 DEFAULT_LOAD_FACTOR
/**
* 預設的加載因子
*/
static final float DEFAULT_LOAD_FACTOR = 0.75f;
2.3.4 TREEIFY_THRESHOLD
/**
* jdk8的優化,連結清單存儲轉換為紅黑樹存儲節點的阙值
* 預設當桶内連結清單節點數量大于8的時候,采用紅黑樹存儲。
*/
static final int TREEIFY_THRESHOLD = 8;
2.3.5 UNTREEIFY_THRESHOLD
/**
* jdk8的優化,紅黑樹存儲轉換為連結清單存儲節點的阙值
* 預設當桶内連結清單節點數量小于6的時候,采用連結清單存儲。
*/
static final int UNTREEIFY_THRESHOLD = 6;
2.3.6 MIN_TREEIFY_CAPACITY
/**
* 桶中結構轉化為紅黑樹對應的數組的最小大小,
* 如果目前容量小于它,就不會将連結清單轉化為紅黑樹,而是用resize()代替
*/
static final int MIN_TREEIFY_CAPACITY = 64;
2.3.7 table
/**
* 存儲樹節點的table,總是2的次方。某些情況允許長度為0
*/
transient Node<K,V>[] table;
2.3.8 entrySet
/**
* 存放具體元素的集
*/
transient Set<Map.Entry<K,V>> entrySet;
2.3.9 size
/**
* map中key-value對的個數
*/
transient int size;
2.3.10 threshold
/**
* 下次resize擴容的臨界量
* 通過tableSizeFor(cap)方法計算出不小于initialCapacity的最近的2的幂作為初始容量,将其先保
* 存在threshold裡,當put時判斷數組為空會調用resize配置設定記憶體,并重新計算正确的threshold
*/
int threshold;
2.3.11 modCount
/**
* 記錄map結構的修改次數,主要用于疊代器的快速失敗
*/
transient int modCount;
2.3.12 loadFactor
/**
* hash table的加載因子
*/
final float loadFactor;
2.4 HashMap的内部類清單
2.4.0 内部類清單圖
2.4.1 Node<K,V>
- 源碼
-
static class Node<K,V> implements Map.Entry<K,V> { final int hash; final K key; V value; Node<K,V> next; Node(int hash, K key, V value, Node<K,V> next) { this.hash = hash; this.key = key; this.value = value; this.next = next; } public final K getKey() { return key; } public final V getValue() { return value; } public final String toString() { return key + "=" + value; } public final int hashCode() { return Objects.hashCode(key) ^ Objects.hashCode(value); } public final V setValue(V newValue) { V oldValue = value; value = newValue; return oldValue; } public final boolean equals(Object o) { if (o == this) return true; if (o instanceof Map.Entry) { Map.Entry<?,?> e = (Map.Entry<?,?>)o; if (Objects.equals(key, e.getKey()) && Objects.equals(value, e.getValue())) return true; } return false; } }
-
- 詳解TODO
2.5 HashMap的方法清單
2.5.0 方法清單圖
2.5.1 構造函數
- 源碼
-
public HashMap(int initialCapacity, float loadFactor) { if (initialCapacity < 0) throw new IllegalArgumentException("Illegal initial capacity: " + initialCapacity); if (initialCapacity > MAXIMUM_CAPACITY) initialCapacity = MAXIMUM_CAPACITY; if (loadFactor <= 0 || Float.isNaN(loadFactor)) throw new IllegalArgumentException("Illegal load factor: " + loadFactor); this.loadFactor = loadFactor; // 通過tableSizeFor(cap)計算出不小于initialCapacity的最近的2的幂作為初始容量,将其先儲存在threshold裡,當put時判斷數組為空會調用resize配置設定記憶體,并重新計算正确的threshold this.threshold = tableSizeFor(initialCapacity); } public HashMap(int initialCapacity) { this(initialCapacity, DEFAULT_LOAD_FACTOR); } public HashMap() { this.loadFactor = DEFAULT_LOAD_FACTOR; } //HashMap(Map<? extends K>)型構造函數 public HashMap(Map<? extends K, ? extends V> m) { this.loadFactor = DEFAULT_LOAD_FACTOR; // 将m中的所有元素添加至HashMap中 putMapEntries(m, false); }
-
2.5.2 tableSizeFor(int cap)
- 源碼
-
/** * 擷取最近的不小于輸入容量參數的2的整數次幂。比如容量是10,則傳回16 */ static final int tableSizeFor(int cap) { int n = cap - 1; n |= n >>> 1; n |= n >>> 2; n |= n >>> 4; n |= n >>> 8; n |= n >>> 16; return (n < 0) ? 1 : (n >= MAXIMUM_CAPACITY) ? MAXIMUM_CAPACITY : n + 1; }
-
- 計算原理如下:
-
JDK8源碼閱讀(七) HashMap TODO一、簡介二、分析 - 通過tableSizeFor(),保證了HashMap容量始終是2的次方,在通過hash尋找index時就可以用邏輯運算來替代取餘,即hash%n用hash&(n -1)替代。
-
2.5.3 putMapEntries(Map<? extends K, ? extends V> m, boolean evict)
- 源碼
-
final void putMapEntries(Map<? extends K, ? extends V> m, boolean evict) { int s = m.size(); if (s > 0) { if (table == null) { // pre-size // 根據負載因子和現有大小擷取到該集合的最大容量ft float ft = ((float)s / loadFactor) + 1.0F; // 如果ft小于2的32次方則是ft,否則是2的32次方 int t = ((ft < (float)MAXIMUM_CAPACITY) ? (int)ft : MAXIMUM_CAPACITY); // 如果t大于此時記錄的下次要擴容的值時,調用方法計算符合的最近的2的N次方的容量 if (t > threshold) threshold = tableSizeFor(t); } // 如果集合大小大于此時記錄的下次要擴容的值時則擴容 else if (s > threshold) resize(); // 周遊添加 for (Map.Entry<? extends K, ? extends V> e : m.entrySet()) { K key = e.getKey(); V value = e.getValue(); putVal(hash(key), key, value, false, evict); } } }
-
2.5.4 hash(Object key)
- 源碼
-
/** * 使用key的哈希值與該哈希值的高16位做異或得到哈希值 */ static final int hash(Object key) { int h; return (key == null) ? 0 : (h = key.hashCode()) ^ (h >>> 16); }
-
- 計算方式如下圖:
-
JDK8源碼閱讀(七) HashMap TODO一、簡介二、分析
-
-
好處:如果直接使用key的hashcode()作為hash很容易發生碰撞。比如,在n - 1為15(0x1111)時,散列值真正生效的隻是低4位。當新增的鍵的hashcode()是2,18,34這樣恰好以16的倍數為差的等差數列,就産生了大量碰撞。
是以,設計者綜合考慮了速度、作用、品質,把高16bit和低16bit進行了異或。因為現在大多數的hashCode的分布已經很不錯了,就算是發生了較多碰撞也用O(logn)的紅黑樹去優化了。僅僅異或一下,既減少了系統的開銷,也不會造成因為高位沒有參與下标的計算(table長度比較小時),進而引起的碰撞。
2.5.5 resize() TODO
- 源碼
-
final Node<K,V>[] resize() { // 目前table儲存 Node<K,V>[] oldTab = table; // 儲存table大小 int oldCap = (oldTab == null) ? 0 : oldTab.length; // 儲存目前門檻值 int oldThr = threshold; int newCap, newThr = 0; // 之前table大小大于0,即已初始化 if (oldCap > 0) { // 超過最大值就不再擴充了,隻設定門檻值 if (oldCap >= MAXIMUM_CAPACITY) { // 門檻值為最大整形 threshold = Integer.MAX_VALUE; return oldTab; } // 容量翻倍 else if ((newCap = oldCap << 1) < MAXIMUM_CAPACITY && oldCap >= DEFAULT_INITIAL_CAPACITY) // 門檻值翻倍 newThr = oldThr << 1; // double threshold } // 初始容量已存在threshold中 else if (oldThr > 0) // initial capacity was placed in threshold newCap = oldThr; // 使用預設值(使用預設構造函數初始化) else { // zero initial threshold signifies using defaults newCap = DEFAULT_INITIAL_CAPACITY; newThr = (int)(DEFAULT_LOAD_FACTOR * DEFAULT_INITIAL_CAPACITY); } // 計算新門檻值 if (newThr == 0) { float ft = (float)newCap * loadFactor; newThr = (newCap < MAXIMUM_CAPACITY && ft < (float)MAXIMUM_CAPACITY ? (int)ft : Integer.MAX_VALUE); } threshold = newThr; @SuppressWarnings({"rawtypes","unchecked"}) // 初始化table Node<K,V>[] newTab = (Node<K,V>[])new Node[newCap]; table = newTab; // 之前的table已經初始化過 if (oldTab != null) { // 複制元素,重新進行hash for (int j = 0; j < oldCap; ++j) { Node<K,V> e; if ((e = oldTab[j]) != null) { oldTab[j] = null; if (e.next == null) //桶中隻有一個結點 newTab[e.hash & (newCap - 1)] = e; else if (e instanceof TreeNode) //紅黑樹 //根據(e.hash & oldCap)分為兩個,如果哪個數目不大于UNTREEIFY_THRESHOLD,就轉為連結清單 ((TreeNode<K,V>)e).split(this, newTab, j, oldCap); else { // preserve order Node<K,V> loHead = null, loTail = null; Node<K,V> hiHead = null, hiTail = null; Node<K,V> next; // 将同一桶中的元素根據(e.hash & oldCap)是否為0進行分割成兩個不同的連結清單,完成rehash do { next = e.next;//儲存下一個節點 if ((e.hash & oldCap) == 0) { //保留在低部分即原索引 if (loTail == null)//第一個結點讓loTail和loHead都指向它 loHead = e; else loTail.next = e; loTail = e; } else { //hash到高部分即原索引+oldCap if (hiTail == null) hiHead = e; else hiTail.next = e; hiTail = e; } } while ((e = next) != null); if (loTail != null) { loTail.next = null; newTab[j] = loHead; } if (hiTail != null) { hiTail.next = null; newTab[j + oldCap] = hiHead; } } } } } return newTab; }
-
final void split(HashMap<K,V> map, Node<K,V>[] tab, int index, int bit) { TreeNode<K,V> b = this; // Relink into lo and hi lists, preserving order TreeNode<K,V> loHead = null, loTail = null; TreeNode<K,V> hiHead = null, hiTail = null; int lc = 0, hc = 0; for (TreeNode<K,V> e = b, next; e != null; e = next) { next = (TreeNode<K,V>)e.next; e.next = null; if ((e.hash & bit) == 0) { if ((e.prev = loTail) == null) loHead = e; else loTail.next = e; loTail = e; ++lc; } else { if ((e.prev = hiTail) == null) hiHead = e; else hiTail.next = e; hiTail = e; ++hc; } } if (loHead != null) { if (lc <= UNTREEIFY_THRESHOLD) tab[index] = loHead.untreeify(map); else { tab[index] = loHead; if (hiHead != null) // (else is already treeified) loHead.treeify(tab); } } if (hiHead != null) { if (hc <= UNTREEIFY_THRESHOLD) tab[index + bit] = hiHead.untreeify(map); else { tab[index + bit] = hiHead; if (loHead != null) hiHead.treeify(tab); } } }
-
2.5.6 put(K key, V value)
- 源碼
-
public V put(K key, V value) { return putVal(hash(key), key, value, false, true); }
-
final V putVal(int hash, K key, V value, boolean onlyIfAbsent, boolean evict) { Node<K,V>[] tab; Node<K,V> p; int n, i; if ((tab = table) == null || (n = tab.length) == 0) n = (tab = resize()).length; if ((p = tab[i = (n - 1) & hash]) == null) tab[i] = newNode(hash, key, value, null); else { Node<K,V> e; K k; if (p.hash == hash && ((k = p.key) == key || (key != null && key.equals(k)))) e = p; else if (p instanceof TreeNode) e = ((TreeNode<K,V>)p).putTreeVal(this, tab, hash, key, value); else { for (int binCount = 0; ; ++binCount) { if ((e = p.next) == null) { p.next = newNode(hash, key, value, null); if (binCount >= TREEIFY_THRESHOLD - 1) // -1 for 1st treeifyBin(tab, hash); break; } if (e.hash == hash && ((k = e.key) == key || (key != null && key.equals(k)))) break; p = e; } } if (e != null) { // existing mapping for key V oldValue = e.value; if (!onlyIfAbsent || oldValue == null) e.value = value; afterNodeAccess(e); return oldValue; } } ++modCount; if (++size > threshold) resize(); afterNodeInsertion(evict); return null; }
-
/** * Tree version of putVal. */ final TreeNode<K,V> putTreeVal(HashMap<K,V> map, Node<K,V>[] tab, int h, K k, V v) { Class<?> kc = null; boolean searched = false; TreeNode<K,V> root = (parent != null) ? root() : this; for (TreeNode<K,V> p = root;;) { int dir, ph; K pk; if ((ph = p.hash) > h) dir = -1; else if (ph < h) dir = 1; else if ((pk = p.key) == k || (k != null && k.equals(pk))) return p; else if ((kc == null && (kc = comparableClassFor(k)) == null) || (dir = compareComparables(kc, k, pk)) == 0) { if (!searched) { TreeNode<K,V> q, ch; searched = true; if (((ch = p.left) != null && (q = ch.find(h, k, kc)) != null) || ((ch = p.right) != null && (q = ch.find(h, k, kc)) != null)) return q; } dir = tieBreakOrder(k, pk); } TreeNode<K,V> xp = p; if ((p = (dir <= 0) ? p.left : p.right) == null) { Node<K,V> xpn = xp.next; TreeNode<K,V> x = map.newTreeNode(h, k, v, xpn); if (dir <= 0) xp.left = x; else xp.right = x; xp.next = x; x.parent = x.prev = xp; if (xpn != null) ((TreeNode<K,V>)xpn).prev = x; moveRootToFront(tab, balanceInsertion(root, x)); return null; } } }
-
/** * Replaces all linked nodes in bin at index for given hash unless * table is too small, in which case resizes instead. */ final void treeifyBin(Node<K,V>[] tab, int hash) { int n, index; Node<K,V> e; if (tab == null || (n = tab.length) < MIN_TREEIFY_CAPACITY) resize(); else if ((e = tab[index = (n - 1) & hash]) != null) { TreeNode<K,V> hd = null, tl = null; do { TreeNode<K,V> p = replacementTreeNode(e, null); if (tl == null) hd = p; else { p.prev = tl; tl.next = p; } tl = p; } while ((e = e.next) != null); if ((tab[index] = hd) != null) hd.treeify(tab); } }
-
- 流程圖
-
JDK8源碼閱讀(七) HashMap TODO一、簡介二、分析
-
2.5.7 get(Object key) TODO
- 源碼
-
public V get(Object key) { Node<K,V> e; // 如果擷取到節點,則傳回value,否則傳回null return (e = getNode(hash(key), key)) == null ? null : e.value; }
-
final Node<K,V> getNode(int hash, Object key) { Node<K,V>[] tab; Node<K,V> first, e; int n; K k; if ((tab = table) != null && (n = tab.length) > 0 && (first = tab[(n - 1) & hash]) != null) { // 如果頭節點不為空并且頭結點的哈希和key的哈希相等并且key也相等,則頭節點就命中并傳回 if (first.hash == hash && // always check first node ((k = first.key) == key || (key != null && key.equals(k)))) return first; // 如果頭結點沒有命中 if ((e = first.next) != null) { // 如果第一個節點是樹節點 if (first instanceof TreeNode) // 調用樹節點方法擷取,時間複雜度O(logn) 見下 return ((TreeNode<K,V>)first).getTreeNode(hash, key); // 如果是連結清單,則判斷倆個節點的哈希值和key值是否相等, 如果相等,傳回 時間複雜度O(n) do { if (e.hash == hash && ((k = e.key) == key || (key != null && key.equals(k)))) return e; } while ((e = e.next) != null); } } return null; }
-
final TreeNode<K,V> getTreeNode(int h, Object k) { // 先擷取根節點(如果目前節點父節點為空則為父節點,否則根據目前節點周遊得到根節點) // 調用find方法擷取節點 return ((parent != null) ? root() : this).find(h, k, null); }
-
final TreeNode<K,V> root() { // 根據目前節點周遊擷取根節點 for (TreeNode<K,V> r = this, p;;) { if ((p = r.parent) == null) return r; r = p; } }
-
/** * 紅黑樹定位節點 */ final TreeNode<K,V> find(int h, Object k, Class<?> kc) { TreeNode<K,V> p = this; do { int ph, dir; K pk; TreeNode<K,V> pl = p.left, pr = p.right, q; if ((ph = p.hash) > h) p = pl; else if (ph < h) p = pr; else if ((pk = p.key) == k || (k != null && k.equals(pk))) return p; else if (pl == null) p = pr; else if (pr == null) p = pl; else if ((kc != null || (kc = comparableClassFor(k)) != null) && (dir = compareComparables(kc, k, pk)) != 0) p = (dir < 0) ? pl : pr; else if ((q = pr.find(h, k, kc)) != null) return q; else p = pl; } while (p != null); return null; }
-
- 流程圖
2.5.8 containsKey(Object key)
- 源碼
-
public boolean containsKey(Object key) { // 内部還是調用的getNode方法 return getNode(hash(key), key) != null; }
-
2.5.9 containsValue(Object value) TODO
- 源碼
-
public boolean containsValue(Object value) { Node<K,V>[] tab; V v; // 直接去周遊 比較value是否有相等的 TODO 疑問:為啥隻是連結清單結構 沒有樹結構? if ((tab = table) != null && size > 0) { for (int i = 0; i < tab.length; ++i) { for (Node<K,V> e = tab[i]; e != null; e = e.next) { if ((v = e.value) == value || (value != null && value.equals(v))) return true; } } } return false; }
-
2.5.10 remove(Object key) TODO
- 源碼