""" bits.py A class to manipulate a collection of bits. Only multiples of 8 bits allowed at this point. A few of the python bit manipulation routines are implemented, but only in a few simple cases. >>> a = Bits('011000010110001001100011') # create one >>> len(a) 24 >>> str(a) '011000010110001001100011' >>> a Bits('011000010110001001100011') >>> b = Bits('abc') # or create one this way >>> b # (any non-0,1 char sets ascii=True) Bits('011000010110001001100011') >>> b.ascii() # output as ascii chars 'abc' >>> b.number() # numbers work too 6382179 >>> Bits(6382179) Bits('011000010110001001100011') >>> a[0] # a subscript returns int 0 >>> a(0) # parens work too 0 >>> a[1:9] # subscript ranges Bits('11000010') >>> a[1:4] # but 110 padded LEFT 'til 8 Bits('00000110') >>> Bits('11110000') ^ Bits('10000001') # XOR Bits('01110001') >>> Bits('10000100') >> 2 # shift (length unchanged) Bits('00100001') >>> Bits('00000001') << 3 Bits('00001000') I've used the python array module as a storage mechanism, thinking it would scale well up to large-ish chunks of data. But so far I'm implementing things by jumping back and forth to strings of 0's and 1's, and only allow xor on things of the same length ... so I'm not sure it was worth the effort. Tested with python 3.5.6. Jim Mahoney | cs.marlboro.edu | April 2019 | MIT License """ import array import re from functools import reduce def list2str(items): """Collapse a list into a string. >>> list2str([1, 2, 3]) '123' """ return reduce(lambda x,y:x+str(y),items,'') def binstr2chars(binstr): """Convert a binary string of 0's and 1's to ascii chars. If the input length isn't a multiple of 8, then zeros will be appended at LEFT until until it is. >>> binstr2chars('011000010110001001100011') 'abc' """ if type(binstr) != str: return '' while len(binstr) % 8 != 0: binstr = '0' + binstr result = '' for i_char in range(len(binstr) // 8): num = 0 for i_bit in range(8): num += int(binstr[8*i_char + i_bit]) num <<= 1 result += chr(num >> 1) return result class Bits: """an array of bits >>> a = Bits('011000010110001001100011') # 0 padded to multiple of 8 >>> len(a) 24 >>> str(a) '011000010110001001100011' >>> a Bits('011000010110001001100011') >>> (a[0], a[1]) (0, 1) >>> (a(0), a(1)) # same (0, 1) >>> a[:3] # gets '011', appends left 0's to multiple of 8 Bits('00000011') """ def __init__(self, init="", ascii=False): if type(init)==int: numbers = [] while init: numbers.append(init % 256) init //= 256 numbers.reverse() self.bytes = array.array('B', numbers) elif type(init)==str: if ascii or re.compile("[^01]").search(init): characters = bytes(init, 'utf-8') self.bytes = array.array('B', characters) else: init = eval('0b' + init) numbers = [] # TODO : don't do this in two places. while init: numbers.append(init % 256) init //= 256 numbers.reverse() self.bytes = array.array('B', numbers) else: self.bytes = array.array('B') def __len__(self): return len(self.bytes)*8 def __call__(self, n): which_byte = int(n)//8 which_bit = 7 - ( int(n) % 8 ) return int( self.bytes[which_byte] & (1 << which_bit) > 0) def __getitem__(self, n): if type(n)==slice: (start, stop, step) = (n.start or 0, n.stop, n.step or 1) new = list2str([self(i) for i in range(start, stop, step)]) return Bits(new) else: return self(n) def __str__(self): return list2str([self[i] for i in range(len(self))]) def ascii(self): return self.bytes.tostring().decode('utf-8') def number(self): """Return bit string as a number. >>> Bits('10000001').number() 129 >>> Bits('abc', ascii=True).number() 6382179 """ total = 0 for b in self.bytes: total += b total *= 256 return total//256 def __lshift__(self, other): """binary shift left, keeping length the same. >>> Bits('00000001') << 3 Bits('00001000') """ binstr = str(self) return Bits(binstr[other:]+('0'*other)) def __rshift__(self, other): """binary shift right, keeping length the same. >>> Bits('10000100') >> 2 Bits('00100001') """ binstr = str(self) return Bits(('0'*other) + binstr[0:len(binstr)-other]) def __xor__(self,other): """ exclusive or >>> Bits('10110000') ^ Bits('10000001') Bits('00110001') """ return Bits(self.number() ^ other.number()) def __repr__(self): return "Bits('"+str(self)+"')" if __name__=="__main__": from doctest import testmod testmod()