Untitled
unknown
c_cpp
a year ago
13 kB
3
Indexable
Never
//===----------------------------------------------------------------------===// // // BusTub // // disk_extendible_hash_table.cpp // // Identification: src/container/disk/hash/disk_extendible_hash_table.cpp // // Copyright (c) 2015-2023, Carnegie Mellon University Database Group // //===----------------------------------------------------------------------===// #include <_types/_uint32_t.h> #include <iostream> #include <string> #include <utility> #include <vector> #include "common/config.h" #include "common/exception.h" #include "common/logger.h" #include "common/macros.h" #include "common/rid.h" #include "common/util/hash_util.h" #include "container/disk/hash/disk_extendible_hash_table.h" #include "storage/index/hash_comparator.h" #include "storage/page/extendible_htable_bucket_page.h" #include "storage/page/extendible_htable_directory_page.h" #include "storage/page/extendible_htable_header_page.h" #include "storage/page/page_guard.h" namespace bustub { template <typename K, typename V, typename KC> DiskExtendibleHashTable<K, V, KC>::DiskExtendibleHashTable(const std::string &name, BufferPoolManager *bpm, const KC &cmp, const HashFunction<K> &hash_fn, uint32_t header_max_depth, uint32_t directory_max_depth, uint32_t bucket_max_size) : bpm_(bpm), cmp_(cmp), hash_fn_(std::move(hash_fn)), header_max_depth_(header_max_depth), directory_max_depth_(directory_max_depth), bucket_max_size_(bucket_max_size) { throw NotImplementedException("DiskExtendibleHashTable is not implemented"); } /***************************************************************************** * SEARCH *****************************************************************************/ template <typename K, typename V, typename KC> auto DiskExtendibleHashTable<K, V, KC>::GetValue(const K &key, std::vector<V> *result, Transaction *transaction) const -> bool { /* My thoughts on Search 1. Use header page to find the corresponding directory page 2. Use the directory page to find the corresponding bucket (using the global index) 3. Traverse this possible bucket to get */ uint32_t hash = Hash(key); Page *header_page = bpm_->FetchPage(header_page_id_); auto *header = reinterpret_cast<ExtendibleHTableHeaderPage *>(header_page->GetData()); uint32_t directory_idx = header->HashToDirectoryIndex(hash); page_id_t directory_page_id = header->GetDirectoryPageId(directory_idx); if(directory_page_id == INVALID_PAGE_ID){ return false; } Page *directory_page = bpm_->FetchPage(directory_page_id); auto *directory = reinterpret_cast<ExtendibleHTableDirectoryPage *>(directory_page->GetData()); uint32_t bucket_idx = directory->HashToBucketIndex(hash); page_id_t bucket_page_id = directory->GetBucketPageId(bucket_idx); if(bucket_page_id == INVALID_PAGE_ID){ return false; } Page *bucket_page = bpm_->FetchPage(bucket_page_id); auto *bucket = reinterpret_cast<ExtendibleHTableBucketPage<K, V, KC> *>(bucket_page->GetData()); V value; if(!bucket->Lookup(key, value, this->cmp_)){ return false; } result->push_back(value); return true; } /***************************************************************************** * INSERTION *****************************************************************************/ template <typename K, typename V, typename KC> auto DiskExtendibleHashTable<K, V, KC>::Insert(const K &key, const V &value, Transaction *transaction) -> bool { /* My thoughts on Insert 1. Use header page to find the corresponding directory page 2. Use the directory page to find the corresponding bucket 3.a If the bucket is not full, then directly insert into that bucket 3.b.1 If the bucket is full, and if the bucket's local depth = global depth, then double the directory; update the directory mapping; redistribute key-value pairs in that bucket 3.b.2 If the bucket is full, and the bucket's local depth != global depth, then just create a new bucket and redistribute */ uint32_t hash = Hash(key); Page *header_page = bpm_->FetchPage(header_page_id_); auto *header = reinterpret_cast<ExtendibleHTableHeaderPage *>(header_page->GetData()); uint32_t directory_idx = header->HashToDirectoryIndex(hash); page_id_t directory_page_id = header->GetDirectoryPageId(directory_idx); Page *directory_page; ExtendibleHTableDirectoryPage *directory; if(directory_page_id == INVALID_PAGE_ID){ directory_page = bpm_->NewPage(&directory_page_id); if(directory_page == nullptr){ throw Exception("Create new page error"); } directory = reinterpret_cast<ExtendibleHTableDirectoryPage *>(directory_page->GetData()); directory->Init(); } else { directory_page = bpm_->FetchPage(directory_page_id); directory = reinterpret_cast<ExtendibleHTableDirectoryPage *>(directory_page->GetData()); } // 4 cases: directly insert (stop); split but no double directory; directory grow; throw exception (stop) while(true){ uint32_t bucket_idx = directory->HashToBucketIndex(hash); page_id_t bucket_page_id = directory->GetBucketPageId(bucket_idx); Page *bucket_page; ExtendibleHTableBucketPage<K, V, KC> *bucket; if(bucket_page_id == INVALID_PAGE_ID){ bucket_page = bpm_->NewPage(&bucket_page_id); if(bucket_page == nullptr){ throw Exception("Create new page error"); } bucket = reinterpret_cast<ExtendibleHTableBucketPage<K, V, KC> *>(bucket_page->GetData()); bucket->Init(); } else { bucket_page = bpm_->FetchPage(bucket_page_id); bucket = reinterpret_cast<ExtendibleHTableBucketPage<K, V, KC> *>(bucket_page->GetData()); } if(bucket->Insert(key, value, this->cmp_)){ return true; } V tmp; if(bucket->Lookup(key, tmp, this->cmp_)){ // duplicate keys return false; } // full but no need to double the directory if(directory->GetLocalDepth(bucket_idx) != directory->GetGlobalDepth()){ page_id_t new_bucket_page_id; Page* new_bucket_page = bpm_->NewPage(&new_bucket_page_id); if(new_bucket_page == nullptr){ throw Exception("create new page error"); } ExtendibleHTableBucketPage<K, V, KC> *new_bucket; new_bucket = reinterpret_cast<ExtendibleHTableBucketPage<K, V, KC> *>(new_bucket_page->GetData()); new_bucket->Init(); // ???????????? std::vector<bool> stay; uint32_t n = bucket->Size(); auto local_depth = static_cast<uint32_t>(1) << directory->GetLocalDepth(bucket_idx); for (uint32_t i = 0; i < n; i++) { if((Hash(bucket->KeyAt(i)) & local_depth) == 0){ stay.push_back(true); } else { stay.push_back(false); } } bucket->SplitBucket(new_bucket, stay); // update local depth??? auto original_depth = directory->GetLocalDepth(bucket_idx); directory->SetLocalDepth(bucket_idx, original_depth + 1); auto new_bucket_idx = directory->GetSplitImageIndex(bucket_idx); directory->SetLocalDepth(new_bucket_idx, original_depth + 1); directory->SetBucketPageId(new_bucket_idx, new_bucket_page_id); continue; } if(directory->GetGlobalDepth() == directory->GetMaxDepth()){ throw Exception("Reach global max depth"); } // full and need to double the directory directory->IncrGlobalDepth(); } } template <typename K, typename V, typename KC> auto DiskExtendibleHashTable<K, V, KC>::InsertToNewDirectory(ExtendibleHTableHeaderPage *header, uint32_t directory_idx, uint32_t hash, const K &key, const V &value) -> bool { page_id_t directory_page_id = header->GetDirectoryPageId(directory_idx); Page *directory_page = bpm_->FetchPage(directory_page_id); auto *directory = reinterpret_cast<ExtendibleHTableDirectoryPage *>(directory_page->GetData()); uint32_t bucket_idx = directory->HashToBucketIndex(hash); bool result = this->InsertToNewBucket(directory, bucket_idx, key, value); return result; } template <typename K, typename V, typename KC> auto DiskExtendibleHashTable<K, V, KC>::InsertToNewBucket(ExtendibleHTableDirectoryPage *directory, uint32_t bucket_idx, const K &key, const V &value) -> bool { page_id_t bucket_page_id = directory->GetBucketPageId(bucket_idx); Page *bucket_page = bpm_->FetchPage(bucket_page_id); auto *bucket = reinterpret_cast<ExtendibleHTableBucketPage<K, V, KC> *>(bucket_page->GetData()); if(!bucket->IsFull()){ bool result = bucket->Insert(key, value, cmp_); return result; } // full so need to split the bucket!!!!! auto new_directory_index = directory->GetSplitImageIndex(bucket_idx); std::cout << new_directory_index << std::endl; return true; } template <typename K, typename V, typename KC> void DiskExtendibleHashTable<K, V, KC>::UpdateDirectoryMapping(ExtendibleHTableDirectoryPage *directory, uint32_t new_bucket_idx, page_id_t new_bucket_page_id, uint32_t new_local_depth, uint32_t local_depth_mask) { throw NotImplementedException("DiskExtendibleHashTable is not implemented"); } /***************************************************************************** * REMOVE *****************************************************************************/ template <typename K, typename V, typename KC> auto DiskExtendibleHashTable<K, V, KC>::Remove(const K &key, Transaction *transaction) -> bool { /* My thoughts on Remove 1. Use header page to find the corresponding directory page 2. Use the directory page to find the corresponding bucket (using the global index) 3. If after removal, the bucket will be empty, then we need to merge; we need to merge with buckets that have same local depth recursively ??? */ uint32_t hash = Hash(key); Page *header_page = bpm_->FetchPage(header_page_id_); auto *header = reinterpret_cast<ExtendibleHTableHeaderPage *>(header_page->GetData()); uint32_t directory_idx = header->HashToDirectoryIndex(hash); page_id_t directory_page_id = header->GetDirectoryPageId(directory_idx); if(directory_page_id == INVALID_PAGE_ID){ return false; } Page* directory_page = bpm_->FetchPage(directory_page_id); auto *directory = reinterpret_cast<ExtendibleHTableDirectoryPage *>(directory_page->GetData()); uint32_t bucket_idx = directory->HashToBucketIndex(hash); page_id_t bucket_page_id = directory->GetBucketPageId(bucket_idx); Page *bucket_page; ExtendibleHTableBucketPage<K, V, KC> *bucket; if(bucket_page_id == INVALID_PAGE_ID){ return false; } bucket_page = bpm_->FetchPage(bucket_page_id); bucket = reinterpret_cast<ExtendibleHTableBucketPage<K, V, KC> *>(bucket_page->GetData()); if(!bucket->Remove(key, cmp_)){ return false; } while(true){ if(!bucket->IsEmpty()) { break; } auto merge_bucket_index = directory->GetSplitImageIndex(bucket_idx); if(directory->GetLocalDepth(merge_bucket_index) != directory->GetLocalDepth(bucket_idx)){ break; } page_id_t merge_bucket_page_id = directory->GetBucketPageId(merge_bucket_index); directory->SetBucketPageId(bucket_idx, merge_bucket_page_id); // update directory bucket_page_id = merge_bucket_page_id; bucket_page = bpm_->FetchPage(bucket_page_id); bucket = reinterpret_cast<ExtendibleHTableBucketPage<K, V, KC> *>(bucket_page->GetData()); directory->DecrLocalDepth(merge_bucket_index); directory->DecrLocalDepth(bucket_idx); } // check whether need to shrink the directory while(directory->CanShrink()){ directory->DecrGlobalDepth(); } return true; } template class DiskExtendibleHashTable<int, int, IntComparator>; template class DiskExtendibleHashTable<GenericKey<4>, RID, GenericComparator<4>>; template class DiskExtendibleHashTable<GenericKey<8>, RID, GenericComparator<8>>; template class DiskExtendibleHashTable<GenericKey<16>, RID, GenericComparator<16>>; template class DiskExtendibleHashTable<GenericKey<32>, RID, GenericComparator<32>>; template class DiskExtendibleHashTable<GenericKey<64>, RID, GenericComparator<64>>; } // namespace bustub