Chapter 7
Hashing
7.1 Dictionaries
A dictionary is a collection of elements; each element
has a field called key,and no two elements have the
same key value.
Operations:
? Insert(x),insert an element with a specified key
value x
? Search(k,x):search an element with a specified key
value k,put the element into x
? Delete(k,x):delete an element with a specified key
value k,put the element into x
7.2 Linear List Representation
? A dictionary can be maintained as an
ordered Linear List(e1,e2,……)
? ei are dictionary elements and their keys are
increased from left to right
? We may define two classes Sorted List and
Sorted Chain to facilitate this representation
1,SortedList
Sequential Search(program 2.1)
The array can be ordered or unordered
Time complexity:
ASLsucc=(1+2+……+n)/n
= ((n+1)*n/2)/n=(n+1)/2=O(n)
a0 a1 a2 an-1
a 0 1 2 n-1
2.Binary Search
It is suitable for sorted list
example
-1 0 1 3 4 6 8 10 12
a,0 1 2 3 4 5 6 7 8
low mid1 mid3 mid2 high
Search x=6
Example:
-1 0 1 3 4 6 8 10 12
a,0 1 2 3 4 5 6 7 8
low mid1 mid3 mid2 high
Search x=6
1) a[mid]=x,found
2) a[mid]>x,x is in the left half of the array,search again
3) a[mid]<x,x is in the right half of the array,search
again
Program—binary search
Template<class T>
int SortedList<T>::BinarySearch(const T&x)const
{ int high=n-1,low=0,mid;
while(low<=high)
{ mid=(low+high)/2;
if(a[mid]<x) low=mid+1;
else if(a[mid]>x) high=mid-1;
else return mid;
}return –1;
}
Analysis of binary search time complexity:
? Best case,compare one time
? Worst case,no longer than the height of the
binary search tree
? Average compare times:
2,SortedChain
class SortedChain (program 7-1)
Search,Delete(program 7-2)
Insert (program 7-3)
7.3 Hashing
1.Ideal hashing O(1)
? Another representation of a dictionary is to
use hashing
? Address=hash(key),also called,
name-address function
7.4 Hashing
2.example
7.4 Hashing
3.problems
? Find a proper hash function
? How to solve a collision
? Select a suitable loading factor ?.
?=n/b,n is number of elements in the hash
table,b is the number of buckets in the hash
table
7.4 Hashing
4.hashing function
7.4 Hashing
5.how to solve a collision
1)linear open address
? If hash(k)=d,and the bucket is already
occupied,then we will examine successive
buckets d+1,d+2,……m -1,0,1,2,……d -1,
in the array
? example
7.4 Hashing
? Example,a hash table with 11 buckets,the
divisor D to use is 11.
80 40 65
Ht 0 1 2 3 4 5 6 7 8 9 10
24 80 58 40 65
Ht 0 1 2 3 4 5 6 7 8 9 10
24 80 58 35 40 65
Ht 0 1 2 3 4 5 6 7 8 9 10
7.4 Hashing
? Performance analysis
the adding sequence is 80,40,65,24,58,35
ASLsucc=(1+1+1+1+2+3)/6=1.5
7.4 Hashing
? C++ Implementation
1)Assume that each element to be stored in the
hash table is of type E and has a field key of type
k.
2)the hash table is implemented using two arrays,
ht and empty,
3)empty[i] is true iff ht[i] does not have an element
in it,It is defined for the deletion operation
7.4 Hashing
? Program 7.11
? C++ class definiition for hash tables
template<class E,class K>
class HashTable{
public:
HashTable(int divisor =11);
~HashTable(){delete[]ht;
delete [] empty;}
7.4 Hashing
bool Search(const K&k,E& e)const;
HashTable<E,K>&Insert(const E&e);
private:
int hSearch(const K&k)const;
int D;//hash function divisor
E *ht; //hash table array
bool *empty; //1D array
};
7.4 Hashing
? Program 7.12—constructor for hashtable
template<class E,class K>
HashTable<E,K>::HashTable(int divisor)
{//Constructor.
D=divisor;
//allocate hash table arrays
ht=new E[D];
empty= new bool[D];
7.4 Hashing
//set all buckets to empty
for(int i=0;i<D;i++)
empty[i]=true;
}
7.4 Hashing
? Program 7.13—search function
template<class E,class K>
int HashTable<E,K>::hSearch(const K&k)const
{//search an open addressed table
//return lacation of k if present
//other wise return insert point if there is space
int i=k%D; //home bucket
int j=i; //start at home bucket
7.4 Hashing
do{
if(empty[j] || ht[j]==k) return j;
j=(j+1)%D; //next bucket
} while(j!=i); //returned to home?
return j; //table full;
}
7.4 Hashing
Template<class E,class K>
bool HashTable<E,K>::Search(const K&k,E&e)const
{//put element that matches k in e.
//return false if no match.
int b=hSearch(k);
if(empty[b]||ht[b]!=k)return false;
e=ht[b];
return true;
}
7.4 Hashing
? Program 7.14—insertion into a hash table
template<class E,class K>
HashTable<E,K>& HashTable<E,K>::Insert(const
E& e)
{//hash table insert
K k=e; //extract key
int b=hSearch(k);
//check if insert is to be done
7.4 Hashing
if(empty[b]){empty[b]=false;
ht[b]=e;
return *this;}
//no insert,check if duplicate or full
if(ht[b]==k)throw BadInput();//duplicate
throw NoMem(); //table full
}
7.4 Hashing
2) Hashing with chains
0
0
0
11 33 55 66 0
36 69 0
16 49 82 0
[0]
[1]
[2]
[3]
[4]
[5]
7.4 Hashing
? C++ Implementation of chained hash tables
template<class E,class K>
class ChainHashTable{
public:
ChainHashTable(int divisor=11)
{D=divisor;
ht=new SortedChain<E,K>[D];}
~ChainHashTable(){delete []ht;}
7.4 Hashing
bool Search(const K&k,E&e)const
{return ht[k%D].Search(k,e);}
ChainHashTable<E,K>& Insert(const E& e)
{ht[e%D].DistinctInsert(e);return *this;}
ChainHashTable<E,K>&Delete(const K&k,E&e)
{ht[k%D].Delete(k,e);return *this;}
private:
int D; //divisor
SortedChain<E,K>* ht; //array of chains
};
Hashing
7.1 Dictionaries
A dictionary is a collection of elements; each element
has a field called key,and no two elements have the
same key value.
Operations:
? Insert(x),insert an element with a specified key
value x
? Search(k,x):search an element with a specified key
value k,put the element into x
? Delete(k,x):delete an element with a specified key
value k,put the element into x
7.2 Linear List Representation
? A dictionary can be maintained as an
ordered Linear List(e1,e2,……)
? ei are dictionary elements and their keys are
increased from left to right
? We may define two classes Sorted List and
Sorted Chain to facilitate this representation
1,SortedList
Sequential Search(program 2.1)
The array can be ordered or unordered
Time complexity:
ASLsucc=(1+2+……+n)/n
= ((n+1)*n/2)/n=(n+1)/2=O(n)
a0 a1 a2 an-1
a 0 1 2 n-1
2.Binary Search
It is suitable for sorted list
example
-1 0 1 3 4 6 8 10 12
a,0 1 2 3 4 5 6 7 8
low mid1 mid3 mid2 high
Search x=6
Example:
-1 0 1 3 4 6 8 10 12
a,0 1 2 3 4 5 6 7 8
low mid1 mid3 mid2 high
Search x=6
1) a[mid]=x,found
2) a[mid]>x,x is in the left half of the array,search again
3) a[mid]<x,x is in the right half of the array,search
again
Program—binary search
Template<class T>
int SortedList<T>::BinarySearch(const T&x)const
{ int high=n-1,low=0,mid;
while(low<=high)
{ mid=(low+high)/2;
if(a[mid]<x) low=mid+1;
else if(a[mid]>x) high=mid-1;
else return mid;
}return –1;
}
Analysis of binary search time complexity:
? Best case,compare one time
? Worst case,no longer than the height of the
binary search tree
? Average compare times:
2,SortedChain
class SortedChain (program 7-1)
Search,Delete(program 7-2)
Insert (program 7-3)
7.3 Hashing
1.Ideal hashing O(1)
? Another representation of a dictionary is to
use hashing
? Address=hash(key),also called,
name-address function
7.4 Hashing
2.example
7.4 Hashing
3.problems
? Find a proper hash function
? How to solve a collision
? Select a suitable loading factor ?.
?=n/b,n is number of elements in the hash
table,b is the number of buckets in the hash
table
7.4 Hashing
4.hashing function
7.4 Hashing
5.how to solve a collision
1)linear open address
? If hash(k)=d,and the bucket is already
occupied,then we will examine successive
buckets d+1,d+2,……m -1,0,1,2,……d -1,
in the array
? example
7.4 Hashing
? Example,a hash table with 11 buckets,the
divisor D to use is 11.
80 40 65
Ht 0 1 2 3 4 5 6 7 8 9 10
24 80 58 40 65
Ht 0 1 2 3 4 5 6 7 8 9 10
24 80 58 35 40 65
Ht 0 1 2 3 4 5 6 7 8 9 10
7.4 Hashing
? Performance analysis
the adding sequence is 80,40,65,24,58,35
ASLsucc=(1+1+1+1+2+3)/6=1.5
7.4 Hashing
? C++ Implementation
1)Assume that each element to be stored in the
hash table is of type E and has a field key of type
k.
2)the hash table is implemented using two arrays,
ht and empty,
3)empty[i] is true iff ht[i] does not have an element
in it,It is defined for the deletion operation
7.4 Hashing
? Program 7.11
? C++ class definiition for hash tables
template<class E,class K>
class HashTable{
public:
HashTable(int divisor =11);
~HashTable(){delete[]ht;
delete [] empty;}
7.4 Hashing
bool Search(const K&k,E& e)const;
HashTable<E,K>&Insert(const E&e);
private:
int hSearch(const K&k)const;
int D;//hash function divisor
E *ht; //hash table array
bool *empty; //1D array
};
7.4 Hashing
? Program 7.12—constructor for hashtable
template<class E,class K>
HashTable<E,K>::HashTable(int divisor)
{//Constructor.
D=divisor;
//allocate hash table arrays
ht=new E[D];
empty= new bool[D];
7.4 Hashing
//set all buckets to empty
for(int i=0;i<D;i++)
empty[i]=true;
}
7.4 Hashing
? Program 7.13—search function
template<class E,class K>
int HashTable<E,K>::hSearch(const K&k)const
{//search an open addressed table
//return lacation of k if present
//other wise return insert point if there is space
int i=k%D; //home bucket
int j=i; //start at home bucket
7.4 Hashing
do{
if(empty[j] || ht[j]==k) return j;
j=(j+1)%D; //next bucket
} while(j!=i); //returned to home?
return j; //table full;
}
7.4 Hashing
Template<class E,class K>
bool HashTable<E,K>::Search(const K&k,E&e)const
{//put element that matches k in e.
//return false if no match.
int b=hSearch(k);
if(empty[b]||ht[b]!=k)return false;
e=ht[b];
return true;
}
7.4 Hashing
? Program 7.14—insertion into a hash table
template<class E,class K>
HashTable<E,K>& HashTable<E,K>::Insert(const
E& e)
{//hash table insert
K k=e; //extract key
int b=hSearch(k);
//check if insert is to be done
7.4 Hashing
if(empty[b]){empty[b]=false;
ht[b]=e;
return *this;}
//no insert,check if duplicate or full
if(ht[b]==k)throw BadInput();//duplicate
throw NoMem(); //table full
}
7.4 Hashing
2) Hashing with chains
0
0
0
11 33 55 66 0
36 69 0
16 49 82 0
[0]
[1]
[2]
[3]
[4]
[5]
7.4 Hashing
? C++ Implementation of chained hash tables
template<class E,class K>
class ChainHashTable{
public:
ChainHashTable(int divisor=11)
{D=divisor;
ht=new SortedChain<E,K>[D];}
~ChainHashTable(){delete []ht;}
7.4 Hashing
bool Search(const K&k,E&e)const
{return ht[k%D].Search(k,e);}
ChainHashTable<E,K>& Insert(const E& e)
{ht[e%D].DistinctInsert(e);return *this;}
ChainHashTable<E,K>&Delete(const K&k,E&e)
{ht[k%D].Delete(k,e);return *this;}
private:
int D; //divisor
SortedChain<E,K>* ht; //array of chains
};