Links
Abstract
Bloom filters impress by their sheer elegance and have become a widely and indiscriminately used tool in network applications, although, as we show, their performance can often be far from optimal. Notably in application areas where false negatives are tolerable, other techniques can clearly be better. We show that, at least for a specific area in the parameter space, Bloom filters are significantly outperformed even by a simple scheme. We show that many application areas where Bloom filters are deployed do not require the strong policy of no false negatives and sometimes even prefer false negatives. We analyze, through modelling, how far Bloom filters are from the optimal and then examine application specific issues in a distributed web caching scenario. We hope to open up and seed discussion towards domain-specific alternatives to Bloom filters while perhaps sparking ideas for a general-purpose alternative.
BibTeX (Download)
@inproceedings{Hurley2007Bloom, title = {Bloom Filters: One Size Fits All?}, author = {Paul Hurley and Marcel Waldvogel}, url = {https://netfuture.ch/wp-content/uploads/2007/waldvogel07bloom.pdf}, issn = {0742-1303}, year = {2007}, date = {2007-06-17}, urldate = {1000-01-01}, booktitle = {Proceedings of IEEE LCN 2007}, journal = {lcn}, pages = {183-190}, abstract = {Bloom filters impress by their sheer elegance and have become a widely and indiscriminately used tool in network applications, although, as we show, their performance can often be far from optimal. Notably in application areas where false negatives are tolerable, other techniques can clearly be better. We show that, at least for a specific area in the parameter space, Bloom filters are significantly outperformed even by a simple scheme. We show that many application areas where Bloom filters are deployed do not require the strong policy of no false negatives and sometimes even prefer false negatives. We analyze, through modelling, how far Bloom filters are from the optimal and then examine application specific issues in a distributed web caching scenario. We hope to open up and seed discussion towards domain-specific alternatives to Bloom filters while perhaps sparking ideas for a general-purpose alternative.}, keywords = {Bloom Filters, Hash Tables}, pubstate = {published}, tppubtype = {inproceedings} }