用哈希表封装myunordered_map和myunordered_set

最新推荐文章于 2025-08-11 19:37:52 发布

原创最新推荐文章于 2025-08-11 19:37:52 发布 · 618 阅读

CC 4.0 BY-SA版权

文章标签：

结构上hash_map和hash_set跟map和set的完全类似，复用同一个hashtable实现key和key/value结构，hash_set传给hash_table的是两个key，hash_map传给hash_table的是pair<const key, value>

2. 模拟实现unordered_map和unordered_set
2.1 实现出复用哈希表的框架，并支持insert

• key参数就用K，value参数就用V，哈希表中的数据类型在哈希表层并不知道是key还是pair<const key, value>，因此还是利用泛型的思想，我们使用T。

• 其次跟map和set相比而言unordered_map和unordered_set的模拟实现类结构更复杂一点，但是
大框架和思路是完全类似的。因为HashTable实现了泛型不知道T参数导致是K，还是pair<K,V>，
那么insert内部进行插入时要用K对象转换成整形取模和K比较相等，因为pair的value不参与计算取
模，且默认支持的是key和value一起比较相等，我们需要时的任何时候只需要比较K对象，所以我们在unordered_map和unordered_set层分别实现⼀个MapKeyOfT和SetKeyOfT的仿函数传给
HashTable的KeyOfT，然后HashTable中通过KeyOfT仿函数取出T类型对象中的K对象，再通过仿函数Hash给key转为整型支持取模。

// MyUnorderedSet.h
namespace kk
{
    template<class K, class Hash = HashFunc<K>>
    class unordered_set
    {
        struct SetKeyOfT
        {
            const K& operator()(const K& key)
            {
                return key;
            }
        };
    public:
        bool insert(const K& key)
        {
            return _ht.Insert(key);
        }
    private:
        hash_bucket::HashTable<K, K, SetKeyOfT, Hash> _ht;
    };
}

// MyUnorderedMap.h
namespace kk
{
    template<class K, class V, class Hash = HashFunc<K>>
    class unordered_map
    {
        struct MapKeyOfT
        {
            const K& operator()(const pair<K, V>& kv)
            {
                return kv.first;
            }
        };
    public:
        bool insert(const pair<K, V>& kv)
        {
            return _ht.Insert(kv);
        }
    private:
        hash_bucket::HashTable<K, pair<K, V>, MapKeyOfT, Hash> _ht;
    };
}

// HashTable.h
template<class K>
struct HashFunc
{
    size_t operator()(const K& key)
    {
        return (size_t)key;
    }
};

namespace hash_bucket
{
    template<class T>
    struct HashNode
    {
        T _data;
        HashNode<T>* _next;

        HashNode(const T& data)
            :_data(data)
            ,_next(nullptr)
        {}
    };
    template<class K, class T, class KeyOfT, class Hash>
    class HashTable
    {
        typedef HashNode<T> Node;
        inline unsigned long __stl_next_prime(unsigned long n)
        {
            static const int __stl_num_primes = 28;
            static const unsigned long __stl_prime_list[__stl_num_primes] =
            {
                53, 97, 193, 389, 769,
                1543, 3079, 6151, 12289, 24593,
                49157, 98317, 196613, 393241, 786433,
                1572869, 3145739, 6291469, 12582917, 25165843,
                50331653, 100663319, 201326611, 402653189, 805306457,
                1610612741, 3221225473, 4294967291
            };
            const unsigned long* first = __stl_prime_list;
            const unsigned long* last = __stl_prime_list + __stl_num_primes;
            const unsigned long* pos = lower_bound(first, last, n);
            return pos == last ? *(last - 1) : *pos;
        }
        public:
        HashTable()
        {
            _tables.resize(__stl_next_prime(_tables.size()), nullptr);
        }
        ~HashTable()
        {
            for (size_t i = 0; i < _tables.size(); i++)
            {
                Node* cur = _tables[i];
                while (cur)
                {
                    Node* next = cur->_next;
                    delete cur;
                    cur = next;
                }
                _tables[i] = nullptr;
            }
        }

        bool Insert(const T& data)
        {
            KeyOfT kot;
            if (Find(kot(data)))
                return false;
            Hash hs;
            size_t hashi = hs(kot(data)) % _tables.size();// 负载因⼦==1扩容
            if (_n == _tables.size())
            {
                vector<Node*> newtables(__stl_next_prime(_tables.size()),nullptr);
                for (size_t i = 0; i < _tables.size(); i++)
                {
                    Node* cur = _tables[i];
                    while (cur)
                    {
                        Node* next = cur->_next;
                        // 旧表中结点，挪动新表重新映射的位置
                        size_t hashi = hs(kot(cur->_data)) % newtables.size();
                        // 头插到新表
                        cur->_next = newtables[hashi];
                        newtables[hashi] = cur;
                        cur = next;
                    }
                    _tables[i] = nullptr;
                }
            _tables.swap(newtables);
            }// 头插
            Node* newnode = new Node(data);
            newnode->_next = _tables[hashi];
            _tables[hashi] = newnode;
            ++_n;
            return true;
        }
    private:
        vector<Node*> _tables;// 指针数组
        size_t _n = 0;// 表中存储数据个数
    };
}

支持iterator的实现

iterator实现思路分析
• iterator实现的大框架跟map/set的iterator思路是一致的，用一个类型封装结点的指针，再通过重载运算符实现，迭代器像指针一样访问的行为，要注意的是哈希表的迭代器是单向迭代器。
• 这里的难点是operator++的实现。iterator中有一个指向结点的指针，如果当前桶下面还有结点，
则结点的指针指向下一个结点即可。如果当前桶走完了，则需要想办法计算找到下一个桶。这里的难点是反而是结构设计的问题，所以iterator中除了有结点的指针，还有哈希表对象的指针，这样当前桶走完了，要计算下一个桶就相对容易多了，用key值计算出当前桶位置，依次往后找下一个不为空的桶即可。因为是单向迭代器，所以不支持operator--。
• begin()返回第一个桶中第一个节点指针构造的迭代器，这里end()返回迭代器可以用空表示。
• unordered_set的iterator不支持修改，我们把unordered_set的第二个模板参数改成const K即
可， HashTable<K, const K, SetKeyOfT, Hash> _ht;
• unordered_map的iterator也不支持修改key但是可以修改value，我们把unordered_map的第二个
模板参数pair的第一个参数改成const K即可， HashTable<K, pair<const K, V>, MapKeyOfT, Hash> _ht;
• 另外，由于迭代器需要哈希表的指针来访问表，因此需要将迭代器设置为哈希表的友元类。哈希表需要typedef迭代器，迭代器有需要使用哈希表指针，因此两个类产生了相互依赖，编译器在找类的时候只会向上寻找，因此两个类谁放在前面都会使得另一个类找不到后面的类。因此我们把迭代器定义在前面，哈希表定义在后面，并在迭代器之前加上前置声明来解决这个问题

• 由于迭代器中使用了哈希表指针，因此迭代器中还需要多传一个模板参数K。

template<class T>
struct HashNode
{
	T _data;
	HashNode<T>* _next;

	HashNode(const T& data)
		:_data(data)
		, _next(nullptr)
	{}
};

//前置声明
template<class K, class T, class KeyOfT, class Hash>
class HashTable;

template<class K,class T,class Ref,class Ptr,class KeyOfT,class Hash>
struct HTIterator
{
	typedef HashNode<T> Node;
	typedef HashTable<K, T, KeyOfT, Hash> HT;
	typedef HTIterator<K, T, Ref, Ptr, KeyOfT, Hash> Self;

	Node* _node;
	const HT* _ht;

	HTIterator(Node* node,const HT* ht)
		:_node(node)
		,_ht(ht)
	{}

	Self& operator++()
	{
		if (_node->_next)
		{
			_node = _node->_next;
		}
		else 
		{
			KeyOfT kot;
			Hash hash;
			int hashi = hash(kot(_node->_data)) % _ht->_tables.size();
			hashi++;
			while (hashi < _ht->_tables.size() && !_ht->_tables[hashi])
			{
				hashi++;
			}
			_node = hashi < _ht->_tables.size() ? _ht->_tables[hashi] : nullptr;
		}
		return *this;
	}

	Self operator++(int)
	{
		Self tmp = *this;
		++(*this);
		return tmp;
	} 

	Ref operator*()
	{
		return _node->_data;
	}
	Ptr operator->()
	{
		return &_node->_data;

	}
	bool operator!= (const Self& s) const
	{
		return _node != s._node;
	}
	bool operator== (const Self& s) const
	{
		return _node == s._node;
	}
};

对于unordered_map，我们还需要重载[]，unordered_map的[]有两种情况，如果key不存在，就插入，并返回插入后value的引用，如果key存在，则插入失败，充当查找功能，返回value的引用。

V& operator[](const K& key)
{
	pair<iterator, bool> ret = _ht.Insert(make_pair(key, V()));
	//it.first是一个迭代器，使用->解引用得到_data，而map传给T为pair<K,V>类型，隐藏了第二层
	//pair取second的->
	return ret.first->second;
}

完整代码实现

//Unordered_Set.h
#pragma once
#include"HashTable.h"

namespace hj
{
	template<class K,class Hash = HashFunc<K>>
	class unordered_set
	{
		struct UnorderedSetKeyOfT
		{
			const K& operator()(const K& key)
			{
				return key;
			}
		};
	public:
		typedef typename hash_bucket::HashTable<K, const K, UnorderedSetKeyOfT, Hash>::Iterator iterator;
		typedef typename hash_bucket::HashTable<K, const K, UnorderedSetKeyOfT, Hash>::ConstIterator const_iterator;

		iterator begin()
		{
			return _ht.Begin();
		}
		iterator end()
		{
			return _ht.End();
		}
		const_iterator begin() const
		{
			return _ht.Begin();
		}
		const_iterator end() const
		{
			return _ht.End();
		}
		bool empty() const
		{
			return _ht.Empty();
		}
		size_t size() const
		{
			return _ht.Size();
		}
		pair<iterator, bool> insert(const K& key)
		{
			return _ht.Insert(key);
		}
		iterator Erase(const K& key)
		{
			return _ht.Erase(key);
		}
		iterator Find(const K& key)
		{
			return _ht.Find(key);
		}
	private:
		hash_bucket::HashTable<K, const K, UnorderedSetKeyOfT, Hash> _ht;
	};
}


//Unordered_Map.h
#pragma once
#include"HashTable.h"

namespace hj
{
	template<class K, class V,class Hash = HashFunc<K>>
	class unordered_map
	{
		struct UnorderedMapKeyOfT
		{
			const K& operator()(const pair<const K,V>& kv)
			{
				return kv.first;
			}
		};
	public:
		typedef typename hash_bucket::HashTable<K, pair<const K, V>, UnorderedMapKeyOfT, Hash>::Iterator iterator;
		typedef typename hash_bucket::HashTable<K, pair<const K, V>, UnorderedMapKeyOfT, Hash>::ConstIterator const_iterator;

		iterator begin()
		{
			return _ht.Begin();
		}
		iterator end()
		{
			return _ht.End();
		}
		const_iterator begin() const
		{
			return _ht.Begin();
		}
		const_iterator end() const
		{
			return _ht.End();
		}
		bool empty() const
		{
			return _ht.Empty();
		}
		size_t size() const
		{
			return _ht.Size();
		}
		pair<iterator, bool> insert(const pair<K,V>& kv)
		{
			return _ht.Insert(kv);
		}
		iterator Erase(const K& key)
		{
			return _ht.Erase(key);
		}
		iterator Find(const K& key)
		{
			return _ht.Find(key);
		}
		V& operator[](const K& key)
		{
			pair<iterator, bool> ret = _ht.Insert(make_pair(key, V()));
			//it.first是一个迭代器，使用->解引用得到_data，而map传给T为pair<K,V>类型，隐藏了第二层
			//pair取second的->
			return ret.first->second;
		}
	private:
		hash_bucket::HashTable<K, pair<const K,V>, UnorderedMapKeyOfT, Hash> _ht;
	};
}


//HashTable.h
#pragma once
#include<vector>
#include<iostream>
#include<string>
using namespace std;
template<class K>
struct HashFunc
{
	size_t operator()(const K& key)
	{
		return (size_t)key;
	}
};

template<>
struct HashFunc<string>
{
	size_t operator()(const string& s)
	{
		size_t val = 0;
		for (auto& ch : s)
		{
			val *= 131;
			val += ch;
		}
		return val;
	}
};

static inline unsigned long __stl_next_prime(unsigned long n)
{
	// Note: assumes long is at least 32 bits.
	static const int __stl_num_primes = 28;
	static const unsigned long __stl_prime_list[__stl_num_primes] = {
		53, 97, 193, 389, 769,
		1543, 3079, 6151, 12289, 24593,
		49157, 98317, 196613, 393241, 786433,
		1572869, 3145739, 6291469, 12582917, 25165843,
		50331653, 100663319, 201326611, 402653189, 805306457,
		1610612741, 3221225473, 4294967291
	};
	const unsigned long* first = __stl_prime_list;
	const unsigned long* last = __stl_prime_list + __stl_num_primes;
	const unsigned long* pos = lower_bound(first, last, n);
	return pos == last ? *(last - 1) : *pos;
}

namespace hash_bucket
{
	template<class T>
	struct HashNode
	{
		T _data;
		HashNode<T>* _next;

		HashNode(const T& data)
			:_data(data)
			, _next(nullptr)
		{}
	};

	//前置声明
	template<class K, class T, class KeyOfT, class Hash>
	class HashTable;

	template<class K,class T,class Ref,class Ptr,class KeyOfT,class Hash>
	struct HTIterator
	{
		typedef HashNode<T> Node;
		typedef HashTable<K, T, KeyOfT, Hash> HT;
		typedef HTIterator<K, T, Ref, Ptr, KeyOfT, Hash> Self;

		Node* _node;
		const HT* _ht;

		HTIterator(Node* node,const HT* ht)
			:_node(node)
			,_ht(ht)
		{}

		Self& operator++()
		{
			if (_node->_next)
			{
				_node = _node->_next;
			}
			else 
			{
				KeyOfT kot;
				Hash hash;
				int hashi = hash(kot(_node->_data)) % _ht->_tables.size();
				hashi++;
				while (hashi < _ht->_tables.size() && !_ht->_tables[hashi])
				{
					hashi++;
				}
				_node = hashi < _ht->_tables.size() ? _ht->_tables[hashi] : nullptr;
			}
			return *this;
		}

		Self operator++(int)
		{
			Self tmp = *this;
			++(*this);
			return tmp;
		} 

		Ref operator*()
		{
			return _node->_data;
		}
		Ptr operator->()
		{
			return &_node->_data;

		}
		bool operator!= (const Self& s) const
		{
			return _node != s._node;
		}
		bool operator== (const Self& s) const
		{
			return _node == s._node;
		}
	};

	template<class K, class T, class KeyOfT, class Hash>
	class HashTable
	{
		template<class K, class T, class Ref, class Ptr, class KeyOfT, class Hash>
		friend struct HTIterator;

		typedef HashNode<T> Node;
	public:
		typedef HTIterator<K, T, T&, T*, KeyOfT, Hash> Iterator;
		typedef HTIterator<K, T, const T&, const T*, KeyOfT, Hash> ConstIterator;

		HashTable(size_t capacity = 0)
			:_n(0)
		{
			_tables.resize(__stl_next_prime(capacity), nullptr);
		}

		~HashTable()
		{
			for (auto& data : _tables)
			{
				if (data)
				{
					Node* cur = data;
					while (cur)
					{
						Node* next = cur->_next;
						delete cur;
						cur = next;
					}
				}
				data = nullptr;
			}
			_n = 0;
		}

		Iterator Begin()
		{
			for (int i = 0; i < _tables.size(); i++)
			{
				if(_tables[i])
				{
					return Iterator(_tables[i], this);
				}
			}
			//如果表为空 返回End
			return End();
		}

		ConstIterator Begin() const
		{
			for (int i = 0; i < _tables.size(); i++)
			{
				if (_tables[i])
				{
					return ConstIterator(_tables[i], this);
				}
			}
			//如果表为空 返回End
			return End();
		}

		Iterator End()
		{
			return Iterator(nullptr, this);
		}

		ConstIterator End() const
		{
			return ConstIterator(nullptr, this);
		}

		pair<Iterator,bool> Insert(const T& data)
		{
			KeyOfT kot;
			Iterator it = Find(kot(data));
			if (it != End())
				return make_pair(it, false);
			if (_n == _tables.size())
			{
				Rehash(__stl_next_prime(_tables.size() + 1));
			}
			Hash hash;
			int hash0 = hash(kot(data)) % _tables.size();
			Node* newnode = new Node(data);
			newnode->_next = _tables[hash0];
			_tables[hash0] = newnode;
			++_n;
			return make_pair(Iterator(newnode, this), true);
		}

		Iterator Find(const K& key)
		{
			if (Empty())
				return End();

			Hash hash;
			KeyOfT kot;
			int hash0 = hash(key) % _tables.size();
			Node* cur = _tables[hash0];
			while (cur)
			{
				if (kot(cur->_data) == key)
				{
					return Iterator(cur,nullptr);
				}
				else
				{
					cur = cur->_next;
				}
			}
			return End();
		}

		Iterator Erase(const K& key)
		{
			if (Empty())
				return End();

			Hash hash;
			int hash0 = hash(key) % _tables.size();
			Node* cur = _tables[hash0];
			Node* prev = nullptr;
			while (cur)
			{
				if (kot(cur->_data) == key)
				{
					Iterator it(cur, this);
					it++;
					if (prev == nullptr)
					{
						_tables[hash0] = nullptr;
					}
					else
					{
						prev->_next = cur->_next;
					}
					delete cur;
					_n--;
					return it;
				}
				else
				{
					prev = cur;
					cur = cur->_next;
				}
			}
			return End();
		}
		size_t Size() const
		{
			return _n;
		}
		bool Empty() const
		{
			return _n == 0;
		}
	private:
		void Rehash(size_t newsize)
		{
			vector<Node*> newtables(newsize,nullptr);
			Hash hash;
			KeyOfT kot;
			for (auto& data : _tables)
			{
				Node* cur = data;
				while (cur)
				{
					Node* next = cur->_next;
					int hash0 = hash(kot(cur->_data)) % newsize;
					cur->_next = newtables[hash0];
					newtables[hash0] = cur;
					cur = next;
				}
				//这一句要写
				data = nullptr;
			}
			_tables.swap(newtables);
		}
		vector<HashNode<T>*> _tables;
		size_t _n;
	};
}