mirror of
https://github.com/LSPosed/DexBuilder.git
synced 2024-11-22 06:06:28 +00:00
243 lines
8.3 KiB
C++
243 lines
8.3 KiB
C++
/*
|
|
* Copyright (C) 2017 The Android Open Source Project
|
|
*
|
|
* Licensed under the Apache License, Version 2.0 (the "License");
|
|
* you may not use this file except in compliance with the License.
|
|
* You may obtain a copy of the License at
|
|
*
|
|
* http://www.apache.org/licenses/LICENSE-2.0
|
|
*
|
|
* Unless required by applicable law or agreed to in writing, software
|
|
* distributed under the License is distributed on an "AS IS" BASIS,
|
|
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
|
* See the License for the specific language governing permissions and
|
|
* limitations under the License.
|
|
*/
|
|
|
|
#pragma once
|
|
|
|
#include <vector>
|
|
#include <cstdint>
|
|
#include <memory>
|
|
|
|
namespace slicer {
|
|
|
|
// A specialized Key -> T* map (note that, unlike std:: containers, the values
|
|
// are always pointers here, and we don't explicitly store the lookup keys)
|
|
//
|
|
// Implemented as an incrementally resizable hash table: we split the logical hash table
|
|
// into two internal fixed size tables, the "full table" and a "insertion table".
|
|
// When the insertion table overflows, we allocate a larger hashtable to replace
|
|
// it and "insertion table" becomes the "full table" (the old "full table" is
|
|
// rehashed into the new hash table)
|
|
//
|
|
// Similar to open addressing hash tables, all the buckets are a single,
|
|
// contiguous array. But this table is growing and the collisions are still handled
|
|
// as chains (using indexes instead of pointers).
|
|
//
|
|
// The result is faster than std::unordered_map and uses ~25% of
|
|
// the memory used by std::unordered_map<const char*, String*>
|
|
//
|
|
// The Hash template argument is a type which must implement:
|
|
// 1. hash function : uint32_t Hash(const Key& key)
|
|
// 2. key compare : bool Compare(const Key& key, T* value)
|
|
// 3. key extraction : Key GetKey(T* value)
|
|
// 4. copy semantics
|
|
//
|
|
template<class Key, class T, class Hash>
|
|
class HashTable {
|
|
private:
|
|
// the index type inside the bucket array
|
|
using Index = uint32_t;
|
|
|
|
static constexpr Index kInitialHashBuckets = (1 << 7) - 1;
|
|
static constexpr Index kAvgChainLength = 2;
|
|
static constexpr Index kInvalidIndex = static_cast<Index>(-1);
|
|
static constexpr double kResizeFactor = 1.6;
|
|
|
|
struct __attribute__((packed)) Bucket {
|
|
T* value = nullptr;
|
|
Index next = kInvalidIndex;
|
|
};
|
|
|
|
class Partition {
|
|
public:
|
|
Partition(Index size, const Hash& hasher);
|
|
bool Insert(T* value);
|
|
T* Lookup(const Key& key, uint32_t hash_value) const;
|
|
Index HashBuckets() const { return hash_buckets_; }
|
|
void InsertAll(const Partition& src);
|
|
void PrintStats(const char* name, bool verbose);
|
|
|
|
private:
|
|
std::vector<Bucket> buckets_;
|
|
const Index hash_buckets_;
|
|
Hash hasher_;
|
|
};
|
|
|
|
public:
|
|
explicit HashTable(const Hash& hasher = Hash()) : hasher_(hasher) {
|
|
// we start with full_table_ == nullptr
|
|
insertion_table_.reset(new Partition(kInitialHashBuckets, hasher_));
|
|
}
|
|
|
|
~HashTable() = default;
|
|
|
|
// No move or copy semantics
|
|
HashTable(const HashTable&) = delete;
|
|
HashTable& operator=(const HashTable&) = delete;
|
|
|
|
// Insert a new, non-nullptr T* into the hash table
|
|
// (we only store unique values so the new value must
|
|
// not be in the table already)
|
|
void Insert(T* value);
|
|
|
|
// Lookup an existing value
|
|
// (returns nullptr if the value is not found)
|
|
T* Lookup(const Key& key) const;
|
|
|
|
void PrintStats(const char* name, bool verbose);
|
|
|
|
private:
|
|
std::unique_ptr<Partition> full_table_;
|
|
std::unique_ptr<Partition> insertion_table_;
|
|
Hash hasher_;
|
|
};
|
|
|
|
template<class Key, class T, class Hash>
|
|
HashTable<Key, T, Hash>::Partition::Partition(Index size, const Hash& hasher)
|
|
: hash_buckets_(size), hasher_(hasher) {
|
|
// allocate space for the hash buckets + avg chain length
|
|
buckets_.reserve(hash_buckets_ * kAvgChainLength);
|
|
buckets_.resize(hash_buckets_);
|
|
}
|
|
|
|
// Similar to the "cellar" version of coalesced hashing,
|
|
// the buckets array is divided into a fixed set of entries
|
|
// addressable by the hash value [0 .. hash_buckets_) and
|
|
// extra buckets for the collision chains [hash_buckets_, buckets_.size())
|
|
// Unlike coalesced hashing, our "cellar" is growing so we don't actually
|
|
// have to coalesce any chains.
|
|
//
|
|
// Returns true if the insertion succeeded, false if the table overflows
|
|
// (we never insert more than the pre-reserved capacity)
|
|
//
|
|
template<class Key, class T, class Hash>
|
|
bool HashTable<Key, T, Hash>::Partition::Insert(T* value) {
|
|
SLICER_CHECK(value != nullptr);
|
|
// overflow?
|
|
if (buckets_.size() + 1 > buckets_.capacity()) {
|
|
return false;
|
|
}
|
|
auto key = hasher_.GetKey(value);
|
|
Index bucket_index = hasher_.Hash(key) % hash_buckets_;
|
|
if (buckets_[bucket_index].value == nullptr) {
|
|
buckets_[bucket_index].value = value;
|
|
} else {
|
|
Bucket new_bucket = {};
|
|
new_bucket.value = value;
|
|
new_bucket.next = buckets_[bucket_index].next;
|
|
buckets_[bucket_index].next = buckets_.size();
|
|
buckets_.push_back(new_bucket);
|
|
}
|
|
return true;
|
|
}
|
|
|
|
template<class Key, class T, class Hash>
|
|
T* HashTable<Key, T, Hash>::Partition::Lookup(const Key& key, uint32_t hash_value) const {
|
|
assert(hash_value == hasher_.Hash(key));
|
|
Index bucket_index = hash_value % hash_buckets_;
|
|
for (Index index = bucket_index; index != kInvalidIndex; index = buckets_[index].next) {
|
|
auto value = buckets_[index].value;
|
|
if (value == nullptr) {
|
|
assert(index < hash_buckets_);
|
|
break;
|
|
} else if (hasher_.Compare(key, value)) {
|
|
return value;
|
|
}
|
|
}
|
|
return nullptr;
|
|
}
|
|
|
|
template<class Key, class T, class Hash>
|
|
void HashTable<Key, T, Hash>::Partition::InsertAll(const Partition& src) {
|
|
for (const auto& bucket : src.buckets_) {
|
|
if (bucket.value != nullptr) {
|
|
SLICER_CHECK(Insert(bucket.value));
|
|
}
|
|
}
|
|
}
|
|
|
|
// Try to insert into the "insertion table". If that overflows,
|
|
// we allocate a new, larger hash table, move "full table" value to it
|
|
// and "insertion table" becomes the new "full table".
|
|
template<class Key, class T, class Hash>
|
|
void HashTable<Key, T, Hash>::Insert(T* value) {
|
|
assert(Lookup(hasher_.GetKey(value)) == nullptr);
|
|
if (!insertion_table_->Insert(value)) {
|
|
std::unique_ptr<Partition> new_hash_table(
|
|
new Partition(insertion_table_->HashBuckets() * kResizeFactor, hasher_));
|
|
if (full_table_) {
|
|
new_hash_table->InsertAll(*full_table_);
|
|
}
|
|
SLICER_CHECK(new_hash_table->Insert(value));
|
|
full_table_ = std::move(insertion_table_);
|
|
insertion_table_ = std::move(new_hash_table);
|
|
}
|
|
}
|
|
|
|
// First look into the "full table" and if the value is
|
|
// not found there look into the "insertion table" next
|
|
template<class Key, class T, class Hash>
|
|
T* HashTable<Key, T, Hash>::Lookup(const Key& key) const {
|
|
auto hash_value = hasher_.Hash(key);
|
|
if (full_table_) {
|
|
auto value = full_table_->Lookup(key, hash_value);
|
|
if (value != nullptr) {
|
|
return value;
|
|
}
|
|
}
|
|
return insertion_table_->Lookup(key, hash_value);
|
|
}
|
|
|
|
template<class Key, class T, class Hash>
|
|
void HashTable<Key, T, Hash>::Partition::PrintStats(const char* name, bool verbose) {
|
|
int max_chain_length = 0;
|
|
int sum_chain_length = 0;
|
|
int used_buckets = 0;
|
|
for (Index i = 0; i < hash_buckets_; ++i) {
|
|
if (verbose) printf("%4d : ", i);
|
|
if (buckets_[i].value != nullptr) {
|
|
++used_buckets;
|
|
int chain_length = 0;
|
|
for (Index ci = i; buckets_[ci].next != kInvalidIndex; ci = buckets_[ci].next) {
|
|
SLICER_CHECK(buckets_[ci].value != nullptr);
|
|
++chain_length;
|
|
if (verbose) printf("*");
|
|
}
|
|
max_chain_length = std::max(max_chain_length, chain_length);
|
|
sum_chain_length += chain_length;
|
|
}
|
|
if (verbose) printf("\n");
|
|
}
|
|
|
|
int avg_chain_length = used_buckets ? sum_chain_length / used_buckets : 0;
|
|
|
|
printf("\nHash table partition (%s):\n", name);
|
|
printf(" hash_buckets : %u\n", hash_buckets_);
|
|
printf(" size/capacity : %zu / %zu\n", buckets_.size(), buckets_.capacity());
|
|
printf(" used_buckets : %d\n", used_buckets);
|
|
printf(" max_chain_length : %d\n", max_chain_length);
|
|
printf(" avg_chain_length : %d\n", avg_chain_length);
|
|
}
|
|
|
|
template<class Key, class T, class Hash>
|
|
void HashTable<Key, T, Hash>::PrintStats(const char* name, bool verbose) {
|
|
printf("\nHash table stats (%s)\n", name);
|
|
if (full_table_) {
|
|
full_table_->PrintStats("full_table", verbose);
|
|
}
|
|
insertion_table_->PrintStats("insertion_table", verbose);
|
|
}
|
|
|
|
} // namespace slicer
|