The Algorithms logo
The Algorithms
AboutDonate
using System;
using System.Collections;
using System.Collections.Generic;
using System.Linq;
using System.Text;
using System.Text.Json;

namespace DataStructures.Probabilistic
{
    public class BloomFilter<T> where T : notnull
    {
        private const uint FnvPrime = 16777619;
        private const uint FnvOffsetBasis = 2166136261;
        private readonly byte[] filter;
        private readonly int numHashes;
        private readonly int sizeBits;

        /// <summary>
        /// Initializes a new instance of the <see cref="BloomFilter{T}"/> class. This constructor will create a Bloom Filter
        /// of an optimal size with the optimal number of hashes to minimize the error rate.
        /// </summary>
        /// <param name="expectedNumElements">Expected number of unique elements that could be added to the filter.</param>
        public BloomFilter(int expectedNumElements)
        {
            numHashes = (int)Math.Ceiling(.693 * 8 * expectedNumElements / expectedNumElements); // compute optimal number of hashes
            filter = new byte[expectedNumElements]; // set up filter with 8 times as many bits as elements
            sizeBits = expectedNumElements * 8; // number of bit slots in the filter
        }

        /// <summary>
        /// Initializes a new instance of the <see cref="BloomFilter{T}"/> class.
        /// This constructor let's you decide how large you want the filter to be as well as allowing you to specify
        /// how many hashes it will use. Only use if you don't care to optimize false positivity.
        /// </summary>
        /// <param name="sizeBits">size in bits you want the filter to be.</param>
        /// <param name="numHashes">number of hash functions to be used.</param>
        public BloomFilter(int sizeBits, int numHashes)
        {
            filter = new byte[sizeBits / 8 + 1];
            this.numHashes = numHashes;
            this.sizeBits = sizeBits;
        }

        /// <summary>
        /// Inserts an item into the bloom filter.
        /// </summary>
        /// <param name="item">The item being inserted into the Bloom Filter.</param>
        public void Insert(T item)
        {
            foreach (var slot in GetSlots(item))
            {
                filter[slot / 8] |= (byte)(1 << (slot % 8)); // set the filter at the decided slot to 1.
            }
        }

        /// <summary>
        /// Searches the Bloom Filter to determine if the item exists in the Bloom Filter.
        /// </summary>
        /// <param name="item">The item being searched for in the Bloom Filter.</param>
        /// <returns>true if the item has been added to the Bloom Filter, false otherwise.</returns>
        public bool Search(T item)
        {
            foreach (var slot in GetSlots(item))
            {
                var @byte = filter[slot / 8]; // Extract the byte in the filter.
                var mask = 1 << (slot % 8); // Build the mask for the slot number.
                if ((@byte & mask) != mask)
                {
                    return false;
                }
            }

            return true;
        }

        /// <summary>
        /// Yields the appropriate slots for the given item.
        /// </summary>
        /// <param name="item">The item to determine the slots for.</param>
        /// <returns>The slots of the filter to flip or check.</returns>
        private IEnumerable<int> GetSlots(T item)
        {
            var hash = item.GetHashCode();
            for (var i = 0; i < numHashes; i++)
            {
                yield return Math.Abs((i + 1) * hash) % sizeBits;
            }
        }
    }
}

BloomFilter

S