summaryrefslogtreecommitdiff
path: root/vendor/github.com/bits-and-blooms/bloom
diff options
context:
space:
mode:
authormo khan <mo@mokhan.ca>2025-07-22 17:35:49 -0600
committermo khan <mo@mokhan.ca>2025-07-22 17:35:49 -0600
commit20ef0d92694465ac86b550df139e8366a0a2b4fa (patch)
tree3f14589e1ce6eb9306a3af31c3a1f9e1af5ed637 /vendor/github.com/bits-and-blooms/bloom
parent44e0d272c040cdc53a98b9f1dc58ae7da67752e6 (diff)
feat: connect to spicedb
Diffstat (limited to 'vendor/github.com/bits-and-blooms/bloom')
-rw-r--r--vendor/github.com/bits-and-blooms/bloom/v3/.gitignore27
-rw-r--r--vendor/github.com/bits-and-blooms/bloom/v3/.travis.yml38
-rw-r--r--vendor/github.com/bits-and-blooms/bloom/v3/LICENSE24
-rw-r--r--vendor/github.com/bits-and-blooms/bloom/v3/Makefile197
-rw-r--r--vendor/github.com/bits-and-blooms/bloom/v3/README.md153
-rw-r--r--vendor/github.com/bits-and-blooms/bloom/v3/SECURITY.md5
-rw-r--r--vendor/github.com/bits-and-blooms/bloom/v3/bloom.go453
-rw-r--r--vendor/github.com/bits-and-blooms/bloom/v3/murmur.go289
8 files changed, 1186 insertions, 0 deletions
diff --git a/vendor/github.com/bits-and-blooms/bloom/v3/.gitignore b/vendor/github.com/bits-and-blooms/bloom/v3/.gitignore
new file mode 100644
index 0000000..e719739
--- /dev/null
+++ b/vendor/github.com/bits-and-blooms/bloom/v3/.gitignore
@@ -0,0 +1,27 @@
+# Compiled Object files, Static and Dynamic libs (Shared Objects)
+*.o
+*.a
+*.so
+
+# Folders
+_obj
+_test
+
+# Architecture specific extensions/prefixes
+*.[568vq]
+[568vq].out
+
+*.cgo1.go
+*.cgo2.c
+_cgo_defun.c
+_cgo_gotypes.go
+_cgo_export.*
+
+_testmain.go
+
+*.exe
+*.test
+*.prof
+
+target
+.idea
diff --git a/vendor/github.com/bits-and-blooms/bloom/v3/.travis.yml b/vendor/github.com/bits-and-blooms/bloom/v3/.travis.yml
new file mode 100644
index 0000000..7b8fd30
--- /dev/null
+++ b/vendor/github.com/bits-and-blooms/bloom/v3/.travis.yml
@@ -0,0 +1,38 @@
+language: go
+
+sudo: false
+
+branches:
+ except:
+ - release
+
+branches:
+ only:
+ - master
+ - develop
+ - travis
+
+go:
+ - 1.8
+ - tip
+
+matrix:
+ allow_failures:
+ - go: tip
+
+before_install:
+ - if [ -n "$GH_USER" ]; then git config --global github.user ${GH_USER}; fi;
+ - if [ -n "$GH_TOKEN" ]; then git config --global github.token ${GH_TOKEN}; fi;
+ - go get github.com/mattn/goveralls
+
+before_script:
+ - make deps
+
+script:
+ - make qa
+
+after_failure:
+ - cat ./target/test/report.xml
+
+after_success:
+ - if [ "$TRAVIS_GO_VERSION" = "1.8" ]; then $HOME/gopath/bin/goveralls -covermode=count -coverprofile=target/report/coverage.out -service=travis-ci; fi;
diff --git a/vendor/github.com/bits-and-blooms/bloom/v3/LICENSE b/vendor/github.com/bits-and-blooms/bloom/v3/LICENSE
new file mode 100644
index 0000000..3b9d36a
--- /dev/null
+++ b/vendor/github.com/bits-and-blooms/bloom/v3/LICENSE
@@ -0,0 +1,24 @@
+Copyright (c) 2014 Will Fitzgerald. All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are
+met:
+
+ * Redistributions of source code must retain the above copyright
+notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above
+copyright notice, this list of conditions and the following disclaimer
+in the documentation and/or other materials provided with the
+distribution.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
diff --git a/vendor/github.com/bits-and-blooms/bloom/v3/Makefile b/vendor/github.com/bits-and-blooms/bloom/v3/Makefile
new file mode 100644
index 0000000..0fcbdcb
--- /dev/null
+++ b/vendor/github.com/bits-and-blooms/bloom/v3/Makefile
@@ -0,0 +1,197 @@
+# MAKEFILE
+#
+# @author Nicola Asuni <info@tecnick.com>
+# @link https://github.com/bits-and-blooms/bloom
+# ------------------------------------------------------------------------------
+
+# List special make targets that are not associated with files
+.PHONY: help all test format fmtcheck vet lint coverage cyclo ineffassign misspell structcheck varcheck errcheck gosimple astscan qa deps clean nuke
+
+# Use bash as shell (Note: Ubuntu now uses dash which doesn't support PIPESTATUS).
+SHELL=/bin/bash
+
+# CVS path (path to the parent dir containing the project)
+CVSPATH=github.com/bits-and-blooms
+
+# Project owner
+OWNER=bits-and-blooms
+
+# Project vendor
+VENDOR=bits-and-blooms
+
+# Project name
+PROJECT=bloom
+
+# Project version
+VERSION=$(shell cat VERSION)
+
+# Name of RPM or DEB package
+PKGNAME=${VENDOR}-${PROJECT}
+
+# Current directory
+CURRENTDIR=$(shell pwd)
+
+# GO lang path
+ifneq ($(GOPATH),)
+ ifeq ($(findstring $(GOPATH),$(CURRENTDIR)),)
+ # the defined GOPATH is not valid
+ GOPATH=
+ endif
+endif
+ifeq ($(GOPATH),)
+ # extract the GOPATH
+ GOPATH=$(firstword $(subst /src/, ,$(CURRENTDIR)))
+endif
+
+# --- MAKE TARGETS ---
+
+# Display general help about this command
+help:
+ @echo ""
+ @echo "$(PROJECT) Makefile."
+ @echo "GOPATH=$(GOPATH)"
+ @echo "The following commands are available:"
+ @echo ""
+ @echo " make qa : Run all the tests"
+ @echo " make test : Run the unit tests"
+ @echo ""
+ @echo " make format : Format the source code"
+ @echo " make fmtcheck : Check if the source code has been formatted"
+ @echo " make vet : Check for suspicious constructs"
+ @echo " make lint : Check for style errors"
+ @echo " make coverage : Generate the coverage report"
+ @echo " make cyclo : Generate the cyclomatic complexity report"
+ @echo " make ineffassign : Detect ineffectual assignments"
+ @echo " make misspell : Detect commonly misspelled words in source files"
+ @echo " make structcheck : Find unused struct fields"
+ @echo " make varcheck : Find unused global variables and constants"
+ @echo " make errcheck : Check that error return values are used"
+ @echo " make gosimple : Suggest code simplifications"
+ @echo " make astscan : GO AST scanner"
+ @echo ""
+ @echo " make docs : Generate source code documentation"
+ @echo ""
+ @echo " make deps : Get the dependencies"
+ @echo " make clean : Remove any build artifact"
+ @echo " make nuke : Deletes any intermediate file"
+ @echo ""
+
+# Alias for help target
+all: help
+
+# Run the unit tests
+test:
+ @mkdir -p target/test
+ @mkdir -p target/report
+ GOPATH=$(GOPATH) \
+ go test \
+ -covermode=atomic \
+ -bench=. \
+ -race \
+ -cpuprofile=target/report/cpu.out \
+ -memprofile=target/report/mem.out \
+ -mutexprofile=target/report/mutex.out \
+ -coverprofile=target/report/coverage.out \
+ -v ./... | \
+ tee >(PATH=$(GOPATH)/bin:$(PATH) go-junit-report > target/test/report.xml); \
+ test $${PIPESTATUS[0]} -eq 0
+
+# Format the source code
+format:
+ @find . -type f -name "*.go" -exec gofmt -s -w {} \;
+
+# Check if the source code has been formatted
+fmtcheck:
+ @mkdir -p target
+ @find . -type f -name "*.go" -exec gofmt -s -d {} \; | tee target/format.diff
+ @test ! -s target/format.diff || { echo "ERROR: the source code has not been formatted - please use 'make format' or 'gofmt'"; exit 1; }
+
+# Check for syntax errors
+vet:
+ GOPATH=$(GOPATH) go vet .
+
+# Check for style errors
+lint:
+ GOPATH=$(GOPATH) PATH=$(GOPATH)/bin:$(PATH) golint .
+
+# Generate the coverage report
+coverage:
+ @mkdir -p target/report
+ GOPATH=$(GOPATH) \
+ go tool cover -html=target/report/coverage.out -o target/report/coverage.html
+
+# Report cyclomatic complexity
+cyclo:
+ @mkdir -p target/report
+ GOPATH=$(GOPATH) gocyclo -avg ./ | tee target/report/cyclo.txt ; test $${PIPESTATUS[0]} -eq 0
+
+# Detect ineffectual assignments
+ineffassign:
+ @mkdir -p target/report
+ GOPATH=$(GOPATH) ineffassign ./ | tee target/report/ineffassign.txt ; test $${PIPESTATUS[0]} -eq 0
+
+# Detect commonly misspelled words in source files
+misspell:
+ @mkdir -p target/report
+ GOPATH=$(GOPATH) misspell -error ./ | tee target/report/misspell.txt ; test $${PIPESTATUS[0]} -eq 0
+
+# Find unused struct fields
+structcheck:
+ @mkdir -p target/report
+ GOPATH=$(GOPATH) structcheck -a ./ | tee target/report/structcheck.txt
+
+# Find unused global variables and constants
+varcheck:
+ @mkdir -p target/report
+ GOPATH=$(GOPATH) varcheck -e ./ | tee target/report/varcheck.txt
+
+# Check that error return values are used
+errcheck:
+ @mkdir -p target/report
+ GOPATH=$(GOPATH) errcheck ./ | tee target/report/errcheck.txt
+
+# Suggest code simplifications
+gosimple:
+ @mkdir -p target/report
+ GOPATH=$(GOPATH) gosimple ./ | tee target/report/gosimple.txt
+
+# AST scanner
+astscan:
+ @mkdir -p target/report
+ GOPATH=$(GOPATH) gas .//*.go | tee target/report/astscan.txt ; test $${PIPESTATUS[0]} -eq 0
+
+# Generate source docs
+docs:
+ @mkdir -p target/docs
+ nohup sh -c 'GOPATH=$(GOPATH) godoc -http=127.0.0.1:6060' > target/godoc_server.log 2>&1 &
+ wget --directory-prefix=target/docs/ --execute robots=off --retry-connrefused --recursive --no-parent --adjust-extension --page-requisites --convert-links http://127.0.0.1:6060/pkg/github.com/${VENDOR}/${PROJECT}/ ; kill -9 `lsof -ti :6060`
+ @echo '<html><head><meta http-equiv="refresh" content="0;./127.0.0.1:6060/pkg/'${CVSPATH}'/'${PROJECT}'/index.html"/></head><a href="./127.0.0.1:6060/pkg/'${CVSPATH}'/'${PROJECT}'/index.html">'${PKGNAME}' Documentation ...</a></html>' > target/docs/index.html
+
+# Alias to run all quality-assurance checks
+qa: fmtcheck test vet lint coverage cyclo ineffassign misspell structcheck varcheck errcheck gosimple astscan
+
+# --- INSTALL ---
+
+# Get the dependencies
+deps:
+ GOPATH=$(GOPATH) go get ./...
+ GOPATH=$(GOPATH) go get github.com/golang/lint/golint
+ GOPATH=$(GOPATH) go get github.com/jstemmer/go-junit-report
+ GOPATH=$(GOPATH) go get github.com/axw/gocov/gocov
+ GOPATH=$(GOPATH) go get github.com/fzipp/gocyclo
+ GOPATH=$(GOPATH) go get github.com/gordonklaus/ineffassign
+ GOPATH=$(GOPATH) go get github.com/client9/misspell/cmd/misspell
+ GOPATH=$(GOPATH) go get github.com/opennota/check/cmd/structcheck
+ GOPATH=$(GOPATH) go get github.com/opennota/check/cmd/varcheck
+ GOPATH=$(GOPATH) go get github.com/kisielk/errcheck
+ GOPATH=$(GOPATH) go get honnef.co/go/tools/cmd/gosimple
+ GOPATH=$(GOPATH) go get github.com/securego/gosec
+
+# Remove any build artifact
+clean:
+ GOPATH=$(GOPATH) go clean ./...
+
+# Deletes any intermediate file
+nuke:
+ rm -rf ./target
+ GOPATH=$(GOPATH) go clean -i ./...
diff --git a/vendor/github.com/bits-and-blooms/bloom/v3/README.md b/vendor/github.com/bits-and-blooms/bloom/v3/README.md
new file mode 100644
index 0000000..dc472ef
--- /dev/null
+++ b/vendor/github.com/bits-and-blooms/bloom/v3/README.md
@@ -0,0 +1,153 @@
+Bloom filters
+-------------
+[![Test](https://github.com/bits-and-blooms/bloom/actions/workflows/test.yml/badge.svg)](https://github.com/bits-and-blooms/bloom/actions/workflows/test.yml)
+[![Go Report Card](https://goreportcard.com/badge/github.com/bits-and-blooms/bloom)](https://goreportcard.com/report/github.com/bits-and-blooms/bloom)
+[![Go Reference](https://pkg.go.dev/badge/github.com/bits-and-blooms/bloom.svg)](https://pkg.go.dev/github.com/bits-and-blooms/bloom/v3)
+
+This library is used by popular systems such as [Milvus](https://github.com/milvus-io/milvus) and [beego](https://github.com/beego/Beego).
+
+A Bloom filter is a concise/compressed representation of a set, where the main
+requirement is to make membership queries; _i.e._, whether an item is a
+member of a set. A Bloom filter will always correctly report the presence
+of an element in the set when the element is indeed present. A Bloom filter
+can use much less storage than the original set, but it allows for some 'false positives':
+it may sometimes report that an element is in the set whereas it is not.
+
+When you construct, you need to know how many elements you have (the desired capacity), and what is the desired false positive rate you are willing to tolerate. A common false-positive rate is 1%. The
+lower the false-positive rate, the more memory you are going to require. Similarly, the higher the
+capacity, the more memory you will use.
+You may construct the Bloom filter capable of receiving 1 million elements with a false-positive
+rate of 1% in the following manner.
+
+```Go
+ filter := bloom.NewWithEstimates(1000000, 0.01)
+```
+
+You should call `NewWithEstimates` conservatively: if you specify a number of elements that it is
+too small, the false-positive bound might be exceeded. A Bloom filter is not a dynamic data structure:
+you must know ahead of time what your desired capacity is.
+
+Our implementation accepts keys for setting and testing as `[]byte`. Thus, to
+add a string item, `"Love"`:
+
+```Go
+ filter.Add([]byte("Love"))
+```
+
+Similarly, to test if `"Love"` is in bloom:
+
+```Go
+ if filter.Test([]byte("Love"))
+```
+
+For numerical data, we recommend that you look into the encoding/binary library. But, for example, to add a `uint32` to the filter:
+
+```Go
+ i := uint32(100)
+ n1 := make([]byte, 4)
+ binary.BigEndian.PutUint32(n1, i)
+ filter.Add(n1)
+```
+
+Godoc documentation: https://pkg.go.dev/github.com/bits-and-blooms/bloom/v3
+
+
+## Installation
+
+```bash
+go get -u github.com/bits-and-blooms/bloom/v3
+```
+
+## Verifying the False Positive Rate
+
+
+Sometimes, the actual false positive rate may differ (slightly) from the
+theoretical false positive rate. We have a function to estimate the false positive rate of a
+Bloom filter with _m_ bits and _k_ hashing functions for a set of size _n_:
+
+```Go
+ if bloom.EstimateFalsePositiveRate(20*n, 5, n) > 0.001 ...
+```
+
+You can use it to validate the computed m, k parameters:
+
+```Go
+ m, k := bloom.EstimateParameters(n, fp)
+ ActualfpRate := bloom.EstimateFalsePositiveRate(m, k, n)
+```
+
+or
+
+```Go
+ f := bloom.NewWithEstimates(n, fp)
+ ActualfpRate := bloom.EstimateFalsePositiveRate(f.m, f.k, n)
+```
+
+You would expect `ActualfpRate` to be close to the desired false-positive rate `fp` in these cases.
+
+The `EstimateFalsePositiveRate` function creates a temporary Bloom filter. It is
+also relatively expensive and only meant for validation.
+
+## Serialization
+
+You can read and write the Bloom filters as follows:
+
+
+```Go
+ f := New(1000, 4)
+ var buf bytes.Buffer
+ bytesWritten, err := f.WriteTo(&buf)
+ if err != nil {
+ t.Fatal(err.Error())
+ }
+ var g BloomFilter
+ bytesRead, err := g.ReadFrom(&buf)
+ if err != nil {
+ t.Fatal(err.Error())
+ }
+ if bytesRead != bytesWritten {
+ t.Errorf("read unexpected number of bytes %d != %d", bytesRead, bytesWritten)
+ }
+```
+
+*Performance tip*:
+When reading and writing to a file or a network connection, you may get better performance by
+wrapping your streams with `bufio` instances.
+
+E.g.,
+```Go
+ f, err := os.Create("myfile")
+ w := bufio.NewWriter(f)
+```
+```Go
+ f, err := os.Open("myfile")
+ r := bufio.NewReader(f)
+```
+
+## Contributing
+
+If you wish to contribute to this project, please branch and issue a pull request against master ("[GitHub Flow](https://guides.github.com/introduction/flow/)")
+
+This project includes a Makefile that allows you to test and build the project with simple commands.
+To see all available options:
+```bash
+make help
+```
+
+## Running all tests
+
+Before committing the code, please check if it passes all tests using (note: this will install some dependencies):
+```bash
+make deps
+make qa
+```
+
+## Design
+
+A Bloom filter has two parameters: _m_, the number of bits used in storage, and _k_, the number of hashing functions on elements of the set. (The actual hashing functions are important, too, but this is not a parameter for this implementation). A Bloom filter is backed by a [BitSet](https://github.com/bits-and-blooms/bitset); a key is represented in the filter by setting the bits at each value of the hashing functions (modulo _m_). Set membership is done by _testing_ whether the bits at each value of the hashing functions (again, modulo _m_) are set. If so, the item is in the set. If the item is actually in the set, a Bloom filter will never fail (the true positive rate is 1.0); but it is susceptible to false positives. The art is to choose _k_ and _m_ correctly.
+
+In this implementation, the hashing functions used is [murmurhash](github.com/twmb/murmur3), a non-cryptographic hashing function.
+
+
+Given the particular hashing scheme, it's best to be empirical about this. Note
+that estimating the FP rate will clear the Bloom filter.
diff --git a/vendor/github.com/bits-and-blooms/bloom/v3/SECURITY.md b/vendor/github.com/bits-and-blooms/bloom/v3/SECURITY.md
new file mode 100644
index 0000000..f888420
--- /dev/null
+++ b/vendor/github.com/bits-and-blooms/bloom/v3/SECURITY.md
@@ -0,0 +1,5 @@
+# Security Policy
+
+## Reporting a Vulnerability
+
+You can report privately a vulnerability by email at daniel@lemire.me (current maintainer).
diff --git a/vendor/github.com/bits-and-blooms/bloom/v3/bloom.go b/vendor/github.com/bits-and-blooms/bloom/v3/bloom.go
new file mode 100644
index 0000000..89dbe24
--- /dev/null
+++ b/vendor/github.com/bits-and-blooms/bloom/v3/bloom.go
@@ -0,0 +1,453 @@
+/*
+Package bloom provides data structures and methods for creating Bloom filters.
+
+A Bloom filter is a representation of a set of _n_ items, where the main
+requirement is to make membership queries; _i.e._, whether an item is a
+member of a set.
+
+A Bloom filter has two parameters: _m_, a maximum size (typically a reasonably large
+multiple of the cardinality of the set to represent) and _k_, the number of hashing
+functions on elements of the set. (The actual hashing functions are important, too,
+but this is not a parameter for this implementation). A Bloom filter is backed by
+a BitSet; a key is represented in the filter by setting the bits at each value of the
+hashing functions (modulo _m_). Set membership is done by _testing_ whether the
+bits at each value of the hashing functions (again, modulo _m_) are set. If so,
+the item is in the set. If the item is actually in the set, a Bloom filter will
+never fail (the true positive rate is 1.0); but it is susceptible to false
+positives. The art is to choose _k_ and _m_ correctly.
+
+In this implementation, the hashing functions used is murmurhash,
+a non-cryptographic hashing function.
+
+This implementation accepts keys for setting as testing as []byte. Thus, to
+add a string item, "Love":
+
+ uint n = 1000
+ filter := bloom.New(20*n, 5) // load of 20, 5 keys
+ filter.Add([]byte("Love"))
+
+Similarly, to test if "Love" is in bloom:
+
+ if filter.Test([]byte("Love"))
+
+For numeric data, I recommend that you look into the binary/encoding library. But,
+for example, to add a uint32 to the filter:
+
+ i := uint32(100)
+ n1 := make([]byte,4)
+ binary.BigEndian.PutUint32(n1,i)
+ f.Add(n1)
+
+Finally, there is a method to estimate the false positive rate of a
+Bloom filter with _m_ bits and _k_ hashing functions for a set of size _n_:
+
+ if bloom.EstimateFalsePositiveRate(20*n, 5, n) > 0.001 ...
+
+You can use it to validate the computed m, k parameters:
+
+ m, k := bloom.EstimateParameters(n, fp)
+ ActualfpRate := bloom.EstimateFalsePositiveRate(m, k, n)
+
+or
+
+ f := bloom.NewWithEstimates(n, fp)
+ ActualfpRate := bloom.EstimateFalsePositiveRate(f.m, f.k, n)
+
+You would expect ActualfpRate to be close to the desired fp in these cases.
+
+The EstimateFalsePositiveRate function creates a temporary Bloom filter. It is
+also relatively expensive and only meant for validation.
+*/
+package bloom
+
+import (
+ "bytes"
+ "encoding/binary"
+ "encoding/json"
+ "fmt"
+ "io"
+ "math"
+
+ "github.com/bits-and-blooms/bitset"
+)
+
+// A BloomFilter is a representation of a set of _n_ items, where the main
+// requirement is to make membership queries; _i.e._, whether an item is a
+// member of a set.
+type BloomFilter struct {
+ m uint
+ k uint
+ b *bitset.BitSet
+}
+
+func max(x, y uint) uint {
+ if x > y {
+ return x
+ }
+ return y
+}
+
+// New creates a new Bloom filter with _m_ bits and _k_ hashing functions
+// We force _m_ and _k_ to be at least one to avoid panics.
+func New(m uint, k uint) *BloomFilter {
+ return &BloomFilter{max(1, m), max(1, k), bitset.New(m)}
+}
+
+// From creates a new Bloom filter with len(_data_) * 64 bits and _k_ hashing
+// functions. The data slice is not going to be reset.
+func From(data []uint64, k uint) *BloomFilter {
+ m := uint(len(data) * 64)
+ return FromWithM(data, m, k)
+}
+
+// FromWithM creates a new Bloom filter with _m_ length, _k_ hashing functions.
+// The data slice is not going to be reset.
+func FromWithM(data []uint64, m, k uint) *BloomFilter {
+ return &BloomFilter{m, k, bitset.From(data)}
+}
+
+// baseHashes returns the four hash values of data that are used to create k
+// hashes
+func baseHashes(data []byte) [4]uint64 {
+ var d digest128 // murmur hashing
+ hash1, hash2, hash3, hash4 := d.sum256(data)
+ return [4]uint64{
+ hash1, hash2, hash3, hash4,
+ }
+}
+
+// location returns the ith hashed location using the four base hash values
+func location(h [4]uint64, i uint) uint64 {
+ ii := uint64(i)
+ return h[ii%2] + ii*h[2+(((ii+(ii%2))%4)/2)]
+}
+
+// location returns the ith hashed location using the four base hash values
+func (f *BloomFilter) location(h [4]uint64, i uint) uint {
+ return uint(location(h, i) % uint64(f.m))
+}
+
+// EstimateParameters estimates requirements for m and k.
+// Based on https://bitbucket.org/ww/bloom/src/829aa19d01d9/bloom.go
+// used with permission.
+func EstimateParameters(n uint, p float64) (m uint, k uint) {
+ m = uint(math.Ceil(-1 * float64(n) * math.Log(p) / math.Pow(math.Log(2), 2)))
+ k = uint(math.Ceil(math.Log(2) * float64(m) / float64(n)))
+ return
+}
+
+// NewWithEstimates creates a new Bloom filter for about n items with fp
+// false positive rate
+func NewWithEstimates(n uint, fp float64) *BloomFilter {
+ m, k := EstimateParameters(n, fp)
+ return New(m, k)
+}
+
+// Cap returns the capacity, _m_, of a Bloom filter
+func (f *BloomFilter) Cap() uint {
+ return f.m
+}
+
+// K returns the number of hash functions used in the BloomFilter
+func (f *BloomFilter) K() uint {
+ return f.k
+}
+
+// BitSet returns the underlying bitset for this filter.
+func (f *BloomFilter) BitSet() *bitset.BitSet {
+ return f.b
+}
+
+// Add data to the Bloom Filter. Returns the filter (allows chaining)
+func (f *BloomFilter) Add(data []byte) *BloomFilter {
+ h := baseHashes(data)
+ for i := uint(0); i < f.k; i++ {
+ f.b.Set(f.location(h, i))
+ }
+ return f
+}
+
+// Merge the data from two Bloom Filters.
+func (f *BloomFilter) Merge(g *BloomFilter) error {
+ // Make sure the m's and k's are the same, otherwise merging has no real use.
+ if f.m != g.m {
+ return fmt.Errorf("m's don't match: %d != %d", f.m, g.m)
+ }
+
+ if f.k != g.k {
+ return fmt.Errorf("k's don't match: %d != %d", f.m, g.m)
+ }
+
+ f.b.InPlaceUnion(g.b)
+ return nil
+}
+
+// Copy creates a copy of a Bloom filter.
+func (f *BloomFilter) Copy() *BloomFilter {
+ fc := New(f.m, f.k)
+ fc.Merge(f) // #nosec
+ return fc
+}
+
+// AddString to the Bloom Filter. Returns the filter (allows chaining)
+func (f *BloomFilter) AddString(data string) *BloomFilter {
+ return f.Add([]byte(data))
+}
+
+// Test returns true if the data is in the BloomFilter, false otherwise.
+// If true, the result might be a false positive. If false, the data
+// is definitely not in the set.
+func (f *BloomFilter) Test(data []byte) bool {
+ h := baseHashes(data)
+ for i := uint(0); i < f.k; i++ {
+ if !f.b.Test(f.location(h, i)) {
+ return false
+ }
+ }
+ return true
+}
+
+// TestString returns true if the string is in the BloomFilter, false otherwise.
+// If true, the result might be a false positive. If false, the data
+// is definitely not in the set.
+func (f *BloomFilter) TestString(data string) bool {
+ return f.Test([]byte(data))
+}
+
+// TestLocations returns true if all locations are set in the BloomFilter, false
+// otherwise.
+func (f *BloomFilter) TestLocations(locs []uint64) bool {
+ for i := 0; i < len(locs); i++ {
+ if !f.b.Test(uint(locs[i] % uint64(f.m))) {
+ return false
+ }
+ }
+ return true
+}
+
+// TestAndAdd is equivalent to calling Test(data) then Add(data).
+// The filter is written to unconditionnally: even if the element is present,
+// the corresponding bits are still set. See also TestOrAdd.
+// Returns the result of Test.
+func (f *BloomFilter) TestAndAdd(data []byte) bool {
+ present := true
+ h := baseHashes(data)
+ for i := uint(0); i < f.k; i++ {
+ l := f.location(h, i)
+ if !f.b.Test(l) {
+ present = false
+ }
+ f.b.Set(l)
+ }
+ return present
+}
+
+// TestAndAddString is the equivalent to calling Test(string) then Add(string).
+// The filter is written to unconditionnally: even if the string is present,
+// the corresponding bits are still set. See also TestOrAdd.
+// Returns the result of Test.
+func (f *BloomFilter) TestAndAddString(data string) bool {
+ return f.TestAndAdd([]byte(data))
+}
+
+// TestOrAdd is equivalent to calling Test(data) then if not present Add(data).
+// If the element is already in the filter, then the filter is unchanged.
+// Returns the result of Test.
+func (f *BloomFilter) TestOrAdd(data []byte) bool {
+ present := true
+ h := baseHashes(data)
+ for i := uint(0); i < f.k; i++ {
+ l := f.location(h, i)
+ if !f.b.Test(l) {
+ present = false
+ f.b.Set(l)
+ }
+ }
+ return present
+}
+
+// TestOrAddString is the equivalent to calling Test(string) then if not present Add(string).
+// If the string is already in the filter, then the filter is unchanged.
+// Returns the result of Test.
+func (f *BloomFilter) TestOrAddString(data string) bool {
+ return f.TestOrAdd([]byte(data))
+}
+
+// ClearAll clears all the data in a Bloom filter, removing all keys
+func (f *BloomFilter) ClearAll() *BloomFilter {
+ f.b.ClearAll()
+ return f
+}
+
+// EstimateFalsePositiveRate returns, for a BloomFilter of m bits
+// and k hash functions, an estimation of the false positive rate when
+//
+// storing n entries. This is an empirical, relatively slow
+//
+// test using integers as keys.
+// This function is useful to validate the implementation.
+func EstimateFalsePositiveRate(m, k, n uint) (fpRate float64) {
+ rounds := uint32(100000)
+ // We construct a new filter.
+ f := New(m, k)
+ n1 := make([]byte, 4)
+ // We populate the filter with n values.
+ for i := uint32(0); i < uint32(n); i++ {
+ binary.BigEndian.PutUint32(n1, i)
+ f.Add(n1)
+ }
+ fp := 0
+ // test for number of rounds
+ for i := uint32(0); i < rounds; i++ {
+ binary.BigEndian.PutUint32(n1, i+uint32(n)+1)
+ if f.Test(n1) {
+ fp++
+ }
+ }
+ fpRate = float64(fp) / (float64(rounds))
+ return
+}
+
+// Approximating the number of items
+// https://en.wikipedia.org/wiki/Bloom_filter#Approximating_the_number_of_items_in_a_Bloom_filter
+func (f *BloomFilter) ApproximatedSize() uint32 {
+ x := float64(f.b.Count())
+ m := float64(f.Cap())
+ k := float64(f.K())
+ size := -1 * m / k * math.Log(1-x/m) / math.Log(math.E)
+ return uint32(math.Floor(size + 0.5)) // round
+}
+
+// bloomFilterJSON is an unexported type for marshaling/unmarshaling BloomFilter struct.
+type bloomFilterJSON struct {
+ M uint `json:"m"`
+ K uint `json:"k"`
+ B *bitset.BitSet `json:"b"`
+}
+
+// MarshalJSON implements json.Marshaler interface.
+func (f BloomFilter) MarshalJSON() ([]byte, error) {
+ return json.Marshal(bloomFilterJSON{f.m, f.k, f.b})
+}
+
+// UnmarshalJSON implements json.Unmarshaler interface.
+func (f *BloomFilter) UnmarshalJSON(data []byte) error {
+ var j bloomFilterJSON
+ err := json.Unmarshal(data, &j)
+ if err != nil {
+ return err
+ }
+ f.m = j.M
+ f.k = j.K
+ f.b = j.B
+ return nil
+}
+
+// WriteTo writes a binary representation of the BloomFilter to an i/o stream.
+// It returns the number of bytes written.
+//
+// Performance: if this function is used to write to a disk or network
+// connection, it might be beneficial to wrap the stream in a bufio.Writer.
+// E.g.,
+//
+// f, err := os.Create("myfile")
+// w := bufio.NewWriter(f)
+func (f *BloomFilter) WriteTo(stream io.Writer) (int64, error) {
+ err := binary.Write(stream, binary.BigEndian, uint64(f.m))
+ if err != nil {
+ return 0, err
+ }
+ err = binary.Write(stream, binary.BigEndian, uint64(f.k))
+ if err != nil {
+ return 0, err
+ }
+ numBytes, err := f.b.WriteTo(stream)
+ return numBytes + int64(2*binary.Size(uint64(0))), err
+}
+
+// ReadFrom reads a binary representation of the BloomFilter (such as might
+// have been written by WriteTo()) from an i/o stream. It returns the number
+// of bytes read.
+//
+// Performance: if this function is used to read from a disk or network
+// connection, it might be beneficial to wrap the stream in a bufio.Reader.
+// E.g.,
+//
+// f, err := os.Open("myfile")
+// r := bufio.NewReader(f)
+func (f *BloomFilter) ReadFrom(stream io.Reader) (int64, error) {
+ var m, k uint64
+ err := binary.Read(stream, binary.BigEndian, &m)
+ if err != nil {
+ return 0, err
+ }
+ err = binary.Read(stream, binary.BigEndian, &k)
+ if err != nil {
+ return 0, err
+ }
+ b := &bitset.BitSet{}
+ numBytes, err := b.ReadFrom(stream)
+ if err != nil {
+ return 0, err
+ }
+ f.m = uint(m)
+ f.k = uint(k)
+ f.b = b
+ return numBytes + int64(2*binary.Size(uint64(0))), nil
+}
+
+// GobEncode implements gob.GobEncoder interface.
+func (f *BloomFilter) GobEncode() ([]byte, error) {
+ var buf bytes.Buffer
+ _, err := f.WriteTo(&buf)
+ if err != nil {
+ return nil, err
+ }
+
+ return buf.Bytes(), nil
+}
+
+// GobDecode implements gob.GobDecoder interface.
+func (f *BloomFilter) GobDecode(data []byte) error {
+ buf := bytes.NewBuffer(data)
+ _, err := f.ReadFrom(buf)
+
+ return err
+}
+
+// MarshalBinary implements binary.BinaryMarshaler interface.
+func (f *BloomFilter) MarshalBinary() ([]byte, error) {
+ var buf bytes.Buffer
+ _, err := f.WriteTo(&buf)
+ if err != nil {
+ return nil, err
+ }
+
+ return buf.Bytes(), nil
+}
+
+// UnmarshalBinary implements binary.BinaryUnmarshaler interface.
+func (f *BloomFilter) UnmarshalBinary(data []byte) error {
+ buf := bytes.NewBuffer(data)
+ _, err := f.ReadFrom(buf)
+
+ return err
+}
+
+// Equal tests for the equality of two Bloom filters
+func (f *BloomFilter) Equal(g *BloomFilter) bool {
+ return f.m == g.m && f.k == g.k && f.b.Equal(g.b)
+}
+
+// Locations returns a list of hash locations representing a data item.
+func Locations(data []byte, k uint) []uint64 {
+ locs := make([]uint64, k)
+
+ // calculate locations
+ h := baseHashes(data)
+ for i := uint(0); i < k; i++ {
+ locs[i] = location(h, i)
+ }
+
+ return locs
+}
diff --git a/vendor/github.com/bits-and-blooms/bloom/v3/murmur.go b/vendor/github.com/bits-and-blooms/bloom/v3/murmur.go
new file mode 100644
index 0000000..c93b1ba
--- /dev/null
+++ b/vendor/github.com/bits-and-blooms/bloom/v3/murmur.go
@@ -0,0 +1,289 @@
+/*
+The bloom library relied on the excellent murmur library
+by Sébastien Paolacci. Unfortunately, it involved some heap
+allocation. We want to avoid any heap allocation whatsoever
+in the hashing process. To preserve backward compatibility, we roll
+our own hashing functions. They are designed to be strictly equivalent
+to Paolacci's implementation.
+
+License on original code:
+
+
+Copyright 2013, Sébastien Paolacci.
+All rights reserved.
+
+Redistribution and use in source and binary forms, with or without
+modification, are permitted provided that the following conditions are met:
+ * Redistributions of source code must retain the above copyright
+ notice, this list of conditions and the following disclaimer.
+ * Redistributions in binary form must reproduce the above copyright
+ notice, this list of conditions and the following disclaimer in the
+ documentation and/or other materials provided with the distribution.
+ * Neither the name of the library nor the
+ names of its contributors may be used to endorse or promote products
+ derived from this software without specific prior written permission.
+
+THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND
+ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
+WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
+DISCLAIMED. IN NO EVENT SHALL <COPYRIGHT HOLDER> BE LIABLE FOR ANY
+DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
+(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
+LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND
+ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+*/
+
+package bloom
+
+import (
+ "encoding/binary"
+ "math/bits"
+ "unsafe"
+)
+
+const (
+ c1_128 = 0x87c37b91114253d5
+ c2_128 = 0x4cf5ad432745937f
+ block_size = 16
+)
+
+// digest128 represents a partial evaluation of a 128 bites hash.
+type digest128 struct {
+ h1 uint64 // Unfinalized running hash part 1.
+ h2 uint64 // Unfinalized running hash part 2.
+}
+
+// bmix will hash blocks (16 bytes)
+func (d *digest128) bmix(p []byte) {
+ nblocks := len(p) / block_size
+ for i := 0; i < nblocks; i++ {
+ b := (*[16]byte)(unsafe.Pointer(&p[i*block_size]))
+ k1, k2 := binary.LittleEndian.Uint64(b[:8]), binary.LittleEndian.Uint64(b[8:])
+ d.bmix_words(k1, k2)
+ }
+}
+
+// bmix_words will hash two 64-bit words (16 bytes)
+func (d *digest128) bmix_words(k1, k2 uint64) {
+ h1, h2 := d.h1, d.h2
+
+ k1 *= c1_128
+ k1 = bits.RotateLeft64(k1, 31)
+ k1 *= c2_128
+ h1 ^= k1
+
+ h1 = bits.RotateLeft64(h1, 27)
+ h1 += h2
+ h1 = h1*5 + 0x52dce729
+
+ k2 *= c2_128
+ k2 = bits.RotateLeft64(k2, 33)
+ k2 *= c1_128
+ h2 ^= k2
+
+ h2 = bits.RotateLeft64(h2, 31)
+ h2 += h1
+ h2 = h2*5 + 0x38495ab5
+ d.h1, d.h2 = h1, h2
+}
+
+// sum128 computers two 64-bit hash value. It is assumed that
+// bmix was first called on the data to process complete blocks
+// of 16 bytes. The 'tail' is a slice representing the 'tail' (leftover
+// elements, fewer than 16). If pad_tail is true, we make it seem like
+// there is an extra element with value 1 appended to the tail.
+// The length parameter represents the full length of the data (including
+// the blocks of 16 bytes, and, if pad_tail is true, an extra byte).
+func (d *digest128) sum128(pad_tail bool, length uint, tail []byte) (h1, h2 uint64) {
+ h1, h2 = d.h1, d.h2
+
+ var k1, k2 uint64
+ if pad_tail {
+ switch (len(tail) + 1) & 15 {
+ case 15:
+ k2 ^= uint64(1) << 48
+ break
+ case 14:
+ k2 ^= uint64(1) << 40
+ break
+ case 13:
+ k2 ^= uint64(1) << 32
+ break
+ case 12:
+ k2 ^= uint64(1) << 24
+ break
+ case 11:
+ k2 ^= uint64(1) << 16
+ break
+ case 10:
+ k2 ^= uint64(1) << 8
+ break
+ case 9:
+ k2 ^= uint64(1) << 0
+
+ k2 *= c2_128
+ k2 = bits.RotateLeft64(k2, 33)
+ k2 *= c1_128
+ h2 ^= k2
+
+ break
+
+ case 8:
+ k1 ^= uint64(1) << 56
+ break
+ case 7:
+ k1 ^= uint64(1) << 48
+ break
+ case 6:
+ k1 ^= uint64(1) << 40
+ break
+ case 5:
+ k1 ^= uint64(1) << 32
+ break
+ case 4:
+ k1 ^= uint64(1) << 24
+ break
+ case 3:
+ k1 ^= uint64(1) << 16
+ break
+ case 2:
+ k1 ^= uint64(1) << 8
+ break
+ case 1:
+ k1 ^= uint64(1) << 0
+ k1 *= c1_128
+ k1 = bits.RotateLeft64(k1, 31)
+ k1 *= c2_128
+ h1 ^= k1
+ }
+
+ }
+ switch len(tail) & 15 {
+ case 15:
+ k2 ^= uint64(tail[14]) << 48
+ fallthrough
+ case 14:
+ k2 ^= uint64(tail[13]) << 40
+ fallthrough
+ case 13:
+ k2 ^= uint64(tail[12]) << 32
+ fallthrough
+ case 12:
+ k2 ^= uint64(tail[11]) << 24
+ fallthrough
+ case 11:
+ k2 ^= uint64(tail[10]) << 16
+ fallthrough
+ case 10:
+ k2 ^= uint64(tail[9]) << 8
+ fallthrough
+ case 9:
+ k2 ^= uint64(tail[8]) << 0
+
+ k2 *= c2_128
+ k2 = bits.RotateLeft64(k2, 33)
+ k2 *= c1_128
+ h2 ^= k2
+
+ fallthrough
+
+ case 8:
+ k1 ^= uint64(tail[7]) << 56
+ fallthrough
+ case 7:
+ k1 ^= uint64(tail[6]) << 48
+ fallthrough
+ case 6:
+ k1 ^= uint64(tail[5]) << 40
+ fallthrough
+ case 5:
+ k1 ^= uint64(tail[4]) << 32
+ fallthrough
+ case 4:
+ k1 ^= uint64(tail[3]) << 24
+ fallthrough
+ case 3:
+ k1 ^= uint64(tail[2]) << 16
+ fallthrough
+ case 2:
+ k1 ^= uint64(tail[1]) << 8
+ fallthrough
+ case 1:
+ k1 ^= uint64(tail[0]) << 0
+ k1 *= c1_128
+ k1 = bits.RotateLeft64(k1, 31)
+ k1 *= c2_128
+ h1 ^= k1
+ }
+
+ h1 ^= uint64(length)
+ h2 ^= uint64(length)
+
+ h1 += h2
+ h2 += h1
+
+ h1 = fmix64(h1)
+ h2 = fmix64(h2)
+
+ h1 += h2
+ h2 += h1
+
+ return h1, h2
+}
+
+func fmix64(k uint64) uint64 {
+ k ^= k >> 33
+ k *= 0xff51afd7ed558ccd
+ k ^= k >> 33
+ k *= 0xc4ceb9fe1a85ec53
+ k ^= k >> 33
+ return k
+}
+
+// sum256 will compute 4 64-bit hash values from the input.
+// It is designed to never allocate memory on the heap. So it
+// works without any byte buffer whatsoever.
+// It is designed to be strictly equivalent to
+//
+// a1 := []byte{1}
+// hasher := murmur3.New128()
+// hasher.Write(data) // #nosec
+// v1, v2 := hasher.Sum128()
+// hasher.Write(a1) // #nosec
+// v3, v4 := hasher.Sum128()
+//
+// See TestHashRandom.
+func (d *digest128) sum256(data []byte) (hash1, hash2, hash3, hash4 uint64) {
+ // We always start from zero.
+ d.h1, d.h2 = 0, 0
+ // Process as many bytes as possible.
+ d.bmix(data)
+ // We have enough to compute the first two 64-bit numbers
+ length := uint(len(data))
+ tail_length := length % block_size
+ tail := data[length-tail_length:]
+ hash1, hash2 = d.sum128(false, length, tail)
+ // Next we want to 'virtually' append 1 to the input, but,
+ // we do not want to append to an actual array!!!
+ if tail_length+1 == block_size {
+ // We are left with no tail!!!
+ word1 := binary.LittleEndian.Uint64(tail[:8])
+ word2 := uint64(binary.LittleEndian.Uint32(tail[8 : 8+4]))
+ word2 = word2 | (uint64(tail[12]) << 32) | (uint64(tail[13]) << 40) | (uint64(tail[14]) << 48)
+ // We append 1.
+ word2 = word2 | (uint64(1) << 56)
+ // We process the resulting 2 words.
+ d.bmix_words(word1, word2)
+ tail := data[length:] // empty slice, deliberate.
+ hash3, hash4 = d.sum128(false, length+1, tail)
+ } else {
+ // We still have a tail (fewer than 15 bytes) but we
+ // need to append '1' to it.
+ hash3, hash4 = d.sum128(true, length+1, tail)
+ }
+
+ return hash1, hash2, hash3, hash4
+}