当前位置:Gxlcms > 数据库问题 > leveldb学习:Cache

leveldb学习:Cache

时间:2021-07-01 10:21:17 帮助过:7人阅读

Cache* NewLRUCache(size_t capacity);

用来构造cache派生类对象,并返回派生类指针。那么cache的派生类究竟是什么呢?很容易在cache.cc中发现了ShardedLRUCache类,继承自cache,这是leveldb缓冲区算法的默认实现。

  1. <code class=" hljs cs">ShardedLRUCache成员变量
  2. <span class="hljs-keyword">static</span> <span class="hljs-keyword">const</span> <span class="hljs-keyword">int</span> kNumShardBits = <span class="hljs-number">4</span>;
  3. <span class="hljs-keyword">static</span> <span class="hljs-keyword">const</span> <span class="hljs-keyword">int</span> kNumShards = <span class="hljs-number">1</span> << kNumShardBits;
  4. <span class="hljs-keyword">private</span>:
  5. LRUCache shard_[kNumShards]; <span class="hljs-comment">//暂时不明</span>
  6. port::Mutex id_mutex_; <span class="hljs-comment">//互斥锁</span>
  7. uint64_t last_id_; <span class="hljs-comment">//不明</span></code>

LRUCache的实现我们暂且不知道,但看起来ShardedLRUCache应该是一个封装类,真正的cache是LRUCache,而且是16个。先看ShardedLRUCache函数:

  1. <code class=" hljs cpp"> <span class="hljs-comment">//返回key的hash值</span>
  2. <span class="hljs-keyword">static</span> <span class="hljs-keyword">inline</span> uint32_t HashSlice(<span class="hljs-keyword">const</span> Slice& s) {
  3. <span class="hljs-keyword">return</span> Hash(s.data(), s.size(), <span class="hljs-number">0</span>);
  4. }
  5. <span class="hljs-comment">//取hash的前四位</span>
  6. <span class="hljs-keyword">static</span> uint32_t Shard(uint32_t hash) {
  7. <span class="hljs-keyword">return</span> hash >> (<span class="hljs-number">32</span> - kNumShardBits);
  8. }
  9. <span class="hljs-keyword">public</span>:
  10. <span class="hljs-comment">//构造ShardedLRUCache对象,初始化LRUCache成员变量</span>
  11. <span class="hljs-comment">//设置容量,并且容量和16对齐</span>
  12. <span class="hljs-keyword">explicit</span> ShardedLRUCache(size_t capacity)
  13. : last_id_(<span class="hljs-number">0</span>) {
  14. <span class="hljs-keyword">const</span> size_t per_shard = (capacity + (kNumShards - <span class="hljs-number">1</span>)) / kNumShards;
  15. <span class="hljs-keyword">for</span> (<span class="hljs-keyword">int</span> s = <span class="hljs-number">0</span>; s < kNumShards; s++) {
  16. shard_[s].SetCapacity(per_shard);
  17. }
  18. }
  19. <span class="hljs-keyword">virtual</span> ~ShardedLRUCache() { }
  20. <span class="hljs-comment">//插入操作</span>
  21. <span class="hljs-comment">//先取key的hash值 HashSlice(key),hash值得前四位(Shard(hash))决定key所在的LRUCache数组</span>
  22. <span class="hljs-comment">//将key插入shard_[Shard(hash)]</span>
  23. <span class="hljs-keyword">virtual</span> Handle* Insert(<span class="hljs-keyword">const</span> Slice& key, <span class="hljs-keyword">void</span>* value, size_t charge,
  24. <span class="hljs-keyword">void</span> (*deleter)(<span class="hljs-keyword">const</span> Slice& key, <span class="hljs-keyword">void</span>* value)) {
  25. <span class="hljs-keyword">const</span> uint32_t hash = HashSlice(key);
  26. <span class="hljs-keyword">return</span> shard_[Shard(hash)].Insert(key, hash, value, charge, deleter);
  27. }
  28. <span class="hljs-comment">//查找操作,和插入过程操作逻辑一样</span>
  29. <span class="hljs-keyword">virtual</span> Handle* Lookup(<span class="hljs-keyword">const</span> Slice& key) {
  30. <span class="hljs-keyword">const</span> uint32_t hash = HashSlice(key);
  31. <span class="hljs-keyword">return</span> shard_[Shard(hash)].Lookup(key, hash);
  32. }
  33. <span class="hljs-keyword">virtual</span> <span class="hljs-keyword">void</span> Release(Handle* handle) {
  34. LRUHandle* h = <span class="hljs-keyword">reinterpret_cast</span><LRUHandle*>(handle);
  35. shard_[Shard(h->hash)].Release(handle);
  36. }
  37. <span class="hljs-keyword">virtual</span> <span class="hljs-keyword">void</span> Erase(<span class="hljs-keyword">const</span> Slice& key) {
  38. <span class="hljs-keyword">const</span> uint32_t hash = HashSlice(key);
  39. shard_[Shard(hash)].Erase(key, hash);
  40. }
  41. <span class="hljs-keyword">virtual</span> <span class="hljs-keyword">void</span>* Value(Handle* handle) {
  42. <span class="hljs-keyword">return</span> <span class="hljs-keyword">reinterpret_cast</span><LRUHandle*>(handle)->value;
  43. }
  44. <span class="hljs-keyword">virtual</span> uint64_t NewId() {
  45. MutexLock l(&id_mutex_);
  46. <span class="hljs-keyword">return</span> ++(last_id_);
  47. }</code>

从ShardedLRUCache的成员函数,我们还是获得了很多关于ShardedLRUCache的信息。ShardedLRUCache是一个封装类,真正的cache是LRUCache数组,ShardedLRUCache完成的操作就是计算key的hash值,并以hash值得高四位决定key所在的LRUCache数组,然后调用LRUCache的函数完成cache操作。

  1. <code class=" hljs lasso">LRUCache:
  2. <span class="hljs-comment">// Initialized before use.</span>
  3. size_t capacity_; <span class="hljs-comment">//容量</span>
  4. <span class="hljs-comment">// mutex_ protects the following state.</span>
  5. port<span class="hljs-tag">::Mutex</span> mutex_; <span class="hljs-comment">//互斥锁</span>
  6. size_t usage_; <span class="hljs-comment">//使用量</span>
  7. <span class="hljs-comment">// Dummy head of LRU list.</span>
  8. <span class="hljs-comment">// lru.prev is newest entry, lru.next is oldest entry.</span>
  9. LRUHandle lru_; <span class="hljs-comment">//不明</span>
  10. HandleTable table_; <span class="hljs-comment">//不明</span>
  11. LRUCache成员变量如上,再看看LRUCache的函数
  12. <span class="hljs-comment">//删除e节点</span>
  13. <span class="hljs-literal">void</span> LRUCache<span class="hljs-tag">::LRU_Remove</span>(LRUHandle<span class="hljs-subst">*</span> e) {
  14. e<span class="hljs-subst">-></span>next<span class="hljs-subst">-></span>prev <span class="hljs-subst">=</span> e<span class="hljs-subst">-></span>prev;
  15. e<span class="hljs-subst">-></span>prev<span class="hljs-subst">-></span>next <span class="hljs-subst">=</span> e<span class="hljs-subst">-></span>next;
  16. }
  17. <span class="hljs-comment">//附加e节点</span>
  18. <span class="hljs-literal">void</span> LRUCache<span class="hljs-tag">::LRU_Append</span>(LRUHandle<span class="hljs-subst">*</span> e) {
  19. <span class="hljs-comment">// Make "e" newest entry by inserting just before lru_</span>
  20. <span class="hljs-comment">//添加的新节点在lru_之前</span>
  21. e<span class="hljs-subst">-></span>next <span class="hljs-subst">=</span> <span class="hljs-subst">&</span>lru_;
  22. e<span class="hljs-subst">-></span>prev <span class="hljs-subst">=</span> lru_<span class="hljs-built_in">.</span>prev;
  23. e<span class="hljs-subst">-></span>prev<span class="hljs-subst">-></span>next <span class="hljs-subst">=</span> e;
  24. e<span class="hljs-subst">-></span>next<span class="hljs-subst">-></span>prev <span class="hljs-subst">=</span> e;
  25. }
  26. <span class="hljs-comment">//查找操作</span>
  27. <span class="hljs-comment">//table_保存着key所在handle的指针信息</span>
  28. <span class="hljs-comment">//查找完要把handle提到链表的最前面,是一种为了高效查找的策略</span>
  29. <span class="hljs-keyword">Cache</span><span class="hljs-tag">::Handle</span><span class="hljs-subst">*</span> LRUCache<span class="hljs-tag">::Lookup</span>(const Slice<span class="hljs-subst">&</span> key, uint32_t hash) {
  30. MutexLock l(<span class="hljs-subst">&</span>mutex_);
  31. LRUHandle<span class="hljs-subst">*</span> e <span class="hljs-subst">=</span> table_<span class="hljs-built_in">.</span>Lookup(key, hash);
  32. <span class="hljs-keyword">if</span> (e <span class="hljs-subst">!=</span> <span class="hljs-built_in">NULL</span>) {
  33. e<span class="hljs-subst">-></span>refs<span class="hljs-subst">++</span>;
  34. LRU_Remove(e);
  35. LRU_Append(e);
  36. }
  37. <span class="hljs-keyword">return</span> reinterpret_cast<span class="hljs-subst"><</span><span class="hljs-keyword">Cache</span><span class="hljs-tag">::Handle</span><span class="hljs-subst">*></span>(e);
  38. }</code>

可以确定,LRUCache封装了一个LRUHandle链表的信息,lru_是这个链表的头结点,table_是一个辅助定位链表中各LRUHandle节点的结构。

LRUHandle结构体:
下面我们终于来到了cache的最底层,LRUHandle结构真正包含了所缓冲的数据

  1. <code class=" hljs cpp"><span class="hljs-keyword">struct</span> LRUHandle {
  2. <span class="hljs-comment">//value数据</span>
  3. <span class="hljs-keyword">void</span>* value;
  4. <span class="hljs-comment">//delete函数指针</span>
  5. <span class="hljs-keyword">void</span> (*deleter)(<span class="hljs-keyword">const</span> Slice&, <span class="hljs-keyword">void</span>* value);
  6. <span class="hljs-comment">//下面就是关于LRUHandle链表的实现</span>
  7. <span class="hljs-comment">//可以看明的有key的hash值</span>
  8. <span class="hljs-comment">//key的数据</span>
  9. LRUHandle* next_hash;
  10. LRUHandle* next;
  11. LRUHandle* prev;
  12. size_t charge; <span class="hljs-comment">// TODO(opt): Only allow uint32_t?</span>
  13. size_t key_length;
  14. uint32_t refs;
  15. uint32_t hash; <span class="hljs-comment">// Hash of key(); used for fast sharding and comparisons</span>
  16. <span class="hljs-keyword">char</span> key_data[<span class="hljs-number">1</span>]; <span class="hljs-comment">// Beginning of key</span>
  17. <span class="hljs-comment">//取出所缓冲的数据</span>
  18. Slice key() <span class="hljs-keyword">const</span> {
  19. <span class="hljs-comment">// For cheaper lookups, we allow a temporary Handle object</span>
  20. <span class="hljs-comment">// to store a pointer to a key in "value".</span>
  21. <span class="hljs-keyword">if</span> (next == <span class="hljs-keyword">this</span>) {
  22. <span class="hljs-keyword">return</span> *(<span class="hljs-keyword">reinterpret_cast</span><Slice*>(value));
  23. } <span class="hljs-keyword">else</span> {
  24. <span class="hljs-keyword">return</span> Slice(key_data, key_length);
  25. }
  26. }
  27. };</code>

节点的定义我们看完了,链表的操作我们还是要回到上层LRUCache去体会。

在ShardedLRUCache中我们知道插入一个key是要通过hash值决定key所在的LRUCache数组,之后把key交给数组中相应的LRUCache对象处理,这就调用了
LRUCache::Insert函数

  1. <code class=" hljs cpp">Cache::Handle* LRUCache::Insert(
  2. <span class="hljs-keyword">const</span> Slice& key, uint32_t hash, <span class="hljs-keyword">void</span>* value, size_t charge,
  3. <span class="hljs-keyword">void</span> (*deleter)(<span class="hljs-keyword">const</span> Slice& key, <span class="hljs-keyword">void</span>* value)) {
  4. <span class="hljs-comment">//插入需要上锁</span>
  5. MutexLock l(&mutex_);
  6. <span class="hljs-comment">//构建一个新的LRUHandle节点</span>
  7. LRUHandle* e = <span class="hljs-keyword">reinterpret_cast</span><LRUHandle*>(
  8. <span class="hljs-built_in">malloc</span>(<span class="hljs-keyword">sizeof</span>(LRUHandle)-<span class="hljs-number">1</span> + key.size()));
  9. <span class="hljs-comment">//指定新节点的信息</span>
  10. <span class="hljs-comment">//value值</span>
  11. e->value = value;
  12. <span class="hljs-comment">//key,value的delete函数,可以自定义</span>
  13. e->deleter = deleter;
  14. e->charge = charge;
  15. <span class="hljs-comment">//key的hash、长度等</span>
  16. e->key_length = key.size();
  17. e->hash = hash;
  18. e->refs = <span class="hljs-number">2</span>; <span class="hljs-comment">// One from LRUCache, one for the returned handle</span>
  19. <span class="hljs-built_in">memcpy</span>(e->key_data, key.data(), key.size());
  20. <span class="hljs-comment">//将新节点追加到链表中</span>
  21. LRU_Append(e);
  22. usage_ += charge;
  23. <span class="hljs-comment">//把新链表加入的信息传递给table,在table中登记新节点的信息</span>
  24. LRUHandle* old = table_.Insert(e);
  25. <span class="hljs-keyword">if</span> (old != NULL) {
  26. LRU_Remove(old);
  27. Unref(old);
  28. }
  29. <span class="hljs-comment">//加入新节点后,如果超出LRUCache的设定容量,就删除最旧的节点</span>
  30. <span class="hljs-comment">//新节点都是在头节点前</span>
  31. <span class="hljs-keyword">while</span> (usage_ > capacity_ && lru_.next != &lru_) {
  32. LRUHandle* old = lru_.next;
  33. LRU_Remove(old);
  34. table_.Remove(old->key(), old->hash);
  35. Unref(old);
  36. }
  37. <span class="hljs-keyword">return</span> <span class="hljs-keyword">reinterpret_cast</span><Cache::Handle*>(e);
  38. }</code>

注:Cache::Handle是一个空结构,没有任何成员,也并不会实例化,因为毫无意义。它的存在只是为了做一个指针,是LRUCache中很多函数的返回类型。
现在整个cache的结构和实现就基本已经讲完了,只剩一个存有LRUHandle节点信息、辅助查找的HandleTable没有介绍,但这并不妨碍我们画出cache的结构图,如下:(图片来源自网络)
技术分享

Cache类是一个抽象类,调用全局函数NewLRUCache返回一个SharedLRUCache派生类对象,SharedLRUCache包含一个LRUCache数组,这么做是因为levelDB是多线程的,每个线程访问缓冲区的时候都会将缓冲区锁住,为了多线程访问尽可能快速,减少锁开销,ShardedLRUCache内部有16个LRUCache,这样就可以同时访问这十六个cache区。而LRUCache本身维护了一个双向链表,链表的节点为LRUHandle,LRUHandle放有key-value的数据。

HandleTable:

  1. <code class=" hljs cs"> <span class="hljs-keyword">private</span>:
  2. // The table consists of an array of buckets <span class="hljs-keyword">where</span> each bucket <span class="hljs-keyword">is</span>
  3. // a linked list of cache entries that hash <span class="hljs-keyword">into</span> the bucket.
  4. uint32_t length_; <span class="hljs-comment">//头节点个数</span>
  5. uint32_t elems_; <span class="hljs-comment">//hash表中元素个数</span>
  6. LRUHandle** list_; <span class="hljs-comment">//指针链表</span></code>

HandleTable的实现就是数组实现的hash表,数组中放置LRUHandle指针,根据key的hash值与hash表大小的余数定位key在hash表中的位置,而leveldb使用链表的方式解决竞争问题。每组链表的节点就是LRUCache里的节点,只不过在这里,链表的后向指针是每个LRUHandle对象的next_hash成员,即leveldb是对写进LRUCache的节点做了一次重新排列。这样的策略是相当聪明的,只用了一个指针数组何在节点中添加一个后向指针成员就完成了帮助快速查找的hash表。

  1. <code class=" hljs coffeescript">LRUHandle** FindPointer(<span class="hljs-reserved">const</span> Slice& key, uint32_t hash) {
  2. <span class="hljs-regexp">//</span>hash值得求余
  3. LRUHandle** ptr = &list_[hash & (length_ - <span class="hljs-number">1</span>)];
  4. <span class="hljs-regexp">//</span>利用next_hash指针便利这个链表,对比节点的hash值、key
  5. <span class="hljs-keyword">while</span> (*ptr != NULL &&
  6. <span class="hljs-function"><span class="hljs-params">((*ptr)->hash != hash || key != (*ptr)->key())</span>) {
  7. <span class="hljs-title">ptr</span> = &<span class="hljs-params">(*ptr)</span>-></span>next_hash;
  8. }
  9. <span class="hljs-keyword">return</span> ptr;
  10. }</code>

这是一个利用handletable查找key的函数。
插入操作:

  1. <code class=" hljs lasso">LRUHandle<span class="hljs-subst">*</span> Insert(LRUHandle<span class="hljs-subst">*</span> h) {
  2. <span class="hljs-comment">//插入操作</span>
  3. <span class="hljs-comment">//在handletable中查找key</span>
  4. <span class="hljs-comment">//没有则增加新节点,有则取代老节点</span>
  5. <span class="hljs-comment">//老节点的删除在上层LRUCache::Insert中完成</span>
  6. LRUHandle<span class="hljs-subst">**</span> ptr <span class="hljs-subst">=</span> FindPointer(h<span class="hljs-subst">-></span>key(), h<span class="hljs-subst">-></span>hash);
  7. LRUHandle<span class="hljs-subst">*</span> old <span class="hljs-subst">=</span> <span class="hljs-subst">*</span>ptr;
  8. h<span class="hljs-subst">-></span>next_hash <span class="hljs-subst">=</span> (old <span class="hljs-subst">==</span> <span class="hljs-built_in">NULL</span> <span class="hljs-subst">?</span> <span class="hljs-built_in">NULL</span> : old<span class="hljs-subst">-></span>next_hash);
  9. <span class="hljs-subst">*</span>ptr <span class="hljs-subst">=</span> h;
  10. <span class="hljs-comment">//元素计数elems_更新</span>
  11. <span class="hljs-comment">//元素过多,需要resize hash表,增添新的链表</span>
  12. <span class="hljs-keyword">if</span> (old <span class="hljs-subst">==</span> <span class="hljs-built_in">NULL</span>) {
  13. <span class="hljs-subst">++</span>elems_;
  14. <span class="hljs-keyword">if</span> (elems_ <span class="hljs-subst">></span> length_) {
  15. <span class="hljs-comment">// Since each cache entry is fairly large, we aim for a small</span>
  16. <span class="hljs-comment">// average linked list length (<= 1).</span>
  17. Resize();
  18. }
  19. }
  20. <span class="hljs-keyword">return</span> old;
  21. }</code>

hash表过大时,就要对hash表resize操作:

  1. <code class=" hljs axapta"><span class="hljs-keyword">void</span> Resize() {
  2. <span class="hljs-comment">//重新选定hash表的大小,也就是链表的个数</span>
  3. uint32_t new_length = <span class="hljs-number">4</span>;
  4. <span class="hljs-keyword">while</span> (new_length < elems_) {
  5. new_length *= <span class="hljs-number">2</span>;
  6. }
  7. <span class="hljs-comment">//申请链表头结点指针数组</span>
  8. LRUHandle** new_list = <span class="hljs-keyword">new</span> LRUHandle*[new_length];
  9. memset(new_list, <span class="hljs-number">0</span>, sizeof(new_list[<span class="hljs-number">0</span>]) * new_length);
  10. <span class="hljs-comment">//因为链表数量变了,所以依据hash值定位链表hash & (new_length - 1)结果变了</span>
  11. <span class="hljs-comment">//原先头结点在数组中的位置变了</span>
  12. uint32_t <span class="hljs-keyword">count</span> = <span class="hljs-number">0</span>;
  13. <span class="hljs-keyword">for</span> (uint32_t i = <span class="hljs-number">0</span>; i < length_; i++) {
  14. LRUHandle* h = list_[i];
  15. <span class="hljs-keyword">while</span> (h != NULL) {
  16. LRUHandle* next = h->next_hash;
  17. uint32_t hash = h->hash;
  18. LRUHandle** ptr = &new_list[hash & (new_length - <span class="hljs-number">1</span>)];
  19. h->next_hash = *ptr;
  20. *ptr = h;
  21. h = next;
  22. <span class="hljs-keyword">count</span>++;
  23. }
  24. }
  25. <span class="hljs-comment">//删除原有链表头结点指针数组</span>
  26. <span class="hljs-comment">//更新handletable数据</span>
  27. assert(elems_ == <span class="hljs-keyword">count</span>);
  28. delete[] list_;
  29. list_ = new_list;
  30. length_ = new_length;
  31. }</code>

如果table中的链表数不变,那随着缓存的key越来越多,每个链表的长度就会逐渐增加,会带来查寻效率的底下。新建一个拥有更多元素的hash表很有必要,resize意味着原有的所有节点都要在旧的链表中联系要被打断,建立新的联系。

版权声明:本文为博主原创文章,未经博主允许不得转载。

leveldb学习:Cache

标签:leveldb   cache   

人气教程排行