From a000caf094d6a7157932af1ddeb5bd5f3355f139 Mon Sep 17 00:00:00 2001 From: Jeromy <why@ipfs.io> Date: Thu, 29 Sep 2016 13:19:07 -0700 Subject: [PATCH] add comment detailing the algorithm and fix License: MIT Signed-off-by: Jeromy <why@ipfs.io> --- set.go | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/set.go b/set.go index d93ccd1..e2ac757 100644 --- a/set.go +++ b/set.go @@ -139,6 +139,19 @@ func storeItems(ctx context.Context, dag merkledag.DAGService, estimatedLen uint } hashed := make(map[uint32][]item) for { + // This loop essentially enumerates every single item in the set + // and maps them all into a set of buckets. Each bucket will be recursively + // turned into its own sub-set, and so on down the chain. Each sub-set + // gets added to the dagservice, and put into its place in a set nodes + // links array. + // + // Previously, the bucket was selected by taking an int32 from the hash of + // the input key + seed. This was erroneous as we would later be assigning + // the created sub-sets into an array of length 256 by the modulus of the + // int32 hash value with 256. This resulted in overwriting existing sub-sets + // and losing pins. The fix (a few lines down from this comment), is to + // map the hash value down to the 8 bit keyspace here while creating the + // buckets. This way, we avoid any overlapping later on. k, data, ok := iter() if !ok { break -- GitLab