# This is built on the discussion at # from http://effbot.org/zone/python-hash.htm # See http://docs.python.org/reference/datamodel.html # for special python method names like __getitem__ DEBUG = False def c_mul(a, b): """ Multiply two numbers the way C does """ ## 32 bit long # return eval(hex((long(a) * b) & 0xFFFFFFFFL)[:-1]) ## 64 bit long return eval(hex((long(a) * b) & 0xFFFFFFFFFFFFFFFFL)[:-1]) class FakeString(object): def __init__(self, string=""): self.string = string def __getitem__(self, i): return self.string[i] def __len__(self): return len(self.string) def __str__(self): return self.string def __hash__(self): # 1000003 is smallest prime bigger than a million if not self: return 0 # empty value = ord(self[0]) << 7 if DEBUG: print "debug: value = %i" % value for char in self: # ^ is the bitwise xor operator value = c_mul(1000003, value) ^ ord(char) if DEBUG: print "debug: value = %i" % value value = value ^ len(self) if DEBUG: print "debug: value = %i" % value if value == -1: value = -2 return value if __name__ == "__main__": s = FakeString("foo") h = hash(s) print "hash('%s') = %i" % (s, h)