From 1298bfcc2d99550172d60aa087977a8a06d3e50b Mon Sep 17 00:00:00 2001 From: Philippe Ombredanne Date: Sun, 29 Nov 2015 08:22:37 +0100 Subject: [PATCH 01/23] Fixed minor doc typo. --- README.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.rst b/README.rst index b98efea73..f33a16b2f 100644 --- a/README.rst +++ b/README.rst @@ -5,7 +5,7 @@ bitarray: efficient arrays of booleans This module provides an object type which efficiently represents an array of booleans. Bitarrays are sequence types and behave very much like usual lists. Eight bits are represented by one byte in a contiguous block of -memory. The user can select between two representations; little-endian +memory. The user can select between two representations: little-endian and big-endian. All of the functionality is implemented in C. Methods for accessing the machine representation are provided. This can be useful when bit level access to binary files is required, @@ -156,8 +156,8 @@ Bit endianness -------------- Since a bitarray allows addressing of individual bits, where the machine -represents 8 bits in one byte, there two obvious choices for this mapping; -little- and big-endian. +represents 8 bits in one byte, there are two obvious choices for this +mapping: little- and big-endian. When creating a new bitarray object, the endianness can always be specified explicitly: From 1241055ef0b6df373a71a7678d13cb4ab034e052 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Wed, 20 Jan 2016 09:38:11 -0600 Subject: [PATCH 02/23] add official support for Python 3.5 --- setup.py | 1 + 1 file changed, 1 insertion(+) diff --git a/setup.py b/setup.py index 5eae52743..20f98ef52 100644 --- a/setup.py +++ b/setup.py @@ -34,6 +34,7 @@ "Programming Language :: Python :: 3.2", "Programming Language :: Python :: 3.3", "Programming Language :: Python :: 3.4", + "Programming Language :: Python :: 3.5", "Topic :: Utilities", ], description = "efficient arrays of booleans -- C extension", From a1646c01c977894df68ab2e7e94d0de82d9b9a37 Mon Sep 17 00:00:00 2001 From: Ilan Schnell Date: Mon, 7 Mar 2016 00:39:25 -0600 Subject: [PATCH 03/23] improve test output --- bitarray/test_bitarray.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/bitarray/test_bitarray.py b/bitarray/test_bitarray.py index 83aa3487c..44de2f025 100644 --- a/bitarray/test_bitarray.py +++ b/bitarray/test_bitarray.py @@ -2144,9 +2144,9 @@ def test_write(self): # --------------------------------------------------------------------------- def run(verbosity=1, repeat=1): - print('bitarray is installed in: ' + os.path.dirname(__file__)) - print('bitarray version: ' + __version__) - print(sys.version) + print('bitarray is installed in: %s' % os.path.dirname(__file__)) + print('bitarray version: %s' % __version__) + print('Python version: %s' % sys.version) suite = unittest.TestSuite() for cls in tests: From d5ca62403c8fc0d5e1797c52014e0112f84b7f80 Mon Sep 17 00:00:00 2001 From: Andre Merzky Date: Thu, 12 May 2016 14:30:36 +0200 Subject: [PATCH 04/23] add optional pos parameter to start search from This change is backward compatible. It adds an additional int (or rather ssize_t types) pos parameter to ba.search, and the search is started from that position on. That significantly improves runtime for continuous searches in large bitarrays. In my specific use case of searching 1k distinct bit patterns 1M times in a 16MBit array, the time reduced from 80 seconds to 0.5 seconds. --- bitarray/_bitarray.c | 25 +++++++++++++++---------- bitarray/test_bitarray.py | 11 +++++++++++ 2 files changed, 26 insertions(+), 10 deletions(-) diff --git a/bitarray/_bitarray.c b/bitarray/_bitarray.c index d2c19cb9f..f3a74822e 100644 --- a/bitarray/_bitarray.c +++ b/bitarray/_bitarray.c @@ -1040,10 +1040,10 @@ bitarray_search(bitarrayobject *self, PyObject *args) PyObject *list = NULL; /* list of matching positions to be returned */ PyObject *x, *item = NULL; Py_ssize_t limit = -1; + Py_ssize_t pos = 0; bitarrayobject *xa; - idx_t p; - if (!PyArg_ParseTuple(args, "O|" PY_SSIZE_T_FMT ":_search", &x, &limit)) + if (!PyArg_ParseTuple(args, "O|" PY_SSIZE_T_FMT PY_SSIZE_T_FMT ":_search", &x, &limit, &pos)) return NULL; if (!bitarray_Check(x)) { @@ -1061,13 +1061,15 @@ bitarray_search(bitarrayobject *self, PyObject *args) if (xa->nbits > self->nbits || limit == 0) return list; - p = 0; + // FIXME: make sure pos is in range + + // pos = 0; while (1) { - p = search(self, xa, p); - if (p < 0) + pos = search(self, xa, pos); + if (pos < 0) break; - item = PyLong_FromLongLong(p); - p++; + item = PyLong_FromLongLong(pos); + pos++; if (item == NULL || PyList_Append(list, item) < 0) { Py_XDECREF(item); Py_XDECREF(list); @@ -1081,12 +1083,15 @@ bitarray_search(bitarrayobject *self, PyObject *args) } PyDoc_STRVAR(search_doc, -"search(bitarray, [limit]) -> list\n\ +"search(bitarray, [limit], [pos]) -> list\n\ \n\ Searches for the given a bitarray in self, and returns the start positions\n\ where bitarray matches self as a list.\n\ -The optional argument limits the number of search results to the integer\n\ -specified. By default, all search results are returned."); +The optional 'limit' argument limits the number of search results to the \n\ +integer specified. By default, all search results are returned.\n\ +The optional 'pos' argument begins the search at the position specified.\n\ +By default, search begins at position 0.\n\ +"); static PyObject * diff --git a/bitarray/test_bitarray.py b/bitarray/test_bitarray.py index 44de2f025..33a3647d0 100644 --- a/bitarray/test_bitarray.py +++ b/bitarray/test_bitarray.py @@ -1388,6 +1388,17 @@ def test_search3(self): self.assertEqual(list(a.itersearch(b)), res) self.assertEqual([p for p in a.itersearch(b)], res) + def test_search4(self): + ba = bitarray('0011001100110011') + pos = 0 + res = list() + while True: + match = ba.search(bitarray('11'), 1, pos) + if not match: + break + res.append(match[0]) + pos = match[0] + 1 + self.assertEqual([2, 6, 10, 14], res) def test_fill(self): a = bitarray('') From e0b08dd84119d51751f7ccdf096f940f0af4a34e Mon Sep 17 00:00:00 2001 From: Andre Merzky Date: Thu, 12 May 2016 15:41:01 +0200 Subject: [PATCH 05/23] add tests for pos dependent search --- bitarray/_bitarray.c | 9 ++++----- bitarray/test_bitarray.py | 3 ++- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/bitarray/_bitarray.c b/bitarray/_bitarray.c index f3a74822e..843d0c6af 100644 --- a/bitarray/_bitarray.c +++ b/bitarray/_bitarray.c @@ -1058,12 +1058,11 @@ bitarray_search(bitarrayobject *self, PyObject *args) list = PyList_New(0); if (list == NULL) return NULL; - if (xa->nbits > self->nbits || limit == 0) + if (xa->nbits > self->nbits || limit == 0 || pos > self->nbits) return list; - - // FIXME: make sure pos is in range - - // pos = 0; + if (pos < 0) + pos = 0; + while (1) { pos = search(self, xa, pos); if (pos < 0) diff --git a/bitarray/test_bitarray.py b/bitarray/test_bitarray.py index 33a3647d0..c7bba4d41 100644 --- a/bitarray/test_bitarray.py +++ b/bitarray/test_bitarray.py @@ -1390,7 +1390,7 @@ def test_search3(self): def test_search4(self): ba = bitarray('0011001100110011') - pos = 0 + pos = -1 res = list() while True: match = ba.search(bitarray('11'), 1, pos) @@ -1399,6 +1399,7 @@ def test_search4(self): res.append(match[0]) pos = match[0] + 1 self.assertEqual([2, 6, 10, 14], res) + self.assertEqual([], ba.search(bitarray('11'), 1, len(ba)+1)) def test_fill(self): a = bitarray('') From 9637820df55df3ee62b77a4150a60ddaa4590c43 Mon Sep 17 00:00:00 2001 From: Andre Merzky Date: Thu, 12 May 2016 15:41:25 +0200 Subject: [PATCH 06/23] add tests for pos dependent search --- bitarray/test_bitarray.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/bitarray/test_bitarray.py b/bitarray/test_bitarray.py index c7bba4d41..e77ac38e1 100644 --- a/bitarray/test_bitarray.py +++ b/bitarray/test_bitarray.py @@ -1396,7 +1396,7 @@ def test_search4(self): match = ba.search(bitarray('11'), 1, pos) if not match: break - res.append(match[0]) + res.extent(match) pos = match[0] + 1 self.assertEqual([2, 6, 10, 14], res) self.assertEqual([], ba.search(bitarray('11'), 1, len(ba)+1)) From d6ca2d78a94988ebbb0939ba9da65025947e3c3c Mon Sep 17 00:00:00 2001 From: Andre Merzky Date: Thu, 12 May 2016 15:45:25 +0200 Subject: [PATCH 07/23] extent tests for pos dependent search --- bitarray/test_bitarray.py | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/bitarray/test_bitarray.py b/bitarray/test_bitarray.py index e77ac38e1..b5d81e36b 100644 --- a/bitarray/test_bitarray.py +++ b/bitarray/test_bitarray.py @@ -1390,15 +1390,16 @@ def test_search3(self): def test_search4(self): ba = bitarray('0011001100110011') - pos = -1 - res = list() - while True: - match = ba.search(bitarray('11'), 1, pos) - if not match: - break - res.extent(match) - pos = match[0] + 1 - self.assertEqual([2, 6, 10, 14], res) + for inc in [1, 2, 3, 4, 5, 6]: + pos = -1 + res = list() + while True: + match = ba.search(bitarray('11'), inc, pos) + if not match: + break + res += match + pos = match[-1] + 1 + self.assertEqual([2, 6, 10, 14], res), inc self.assertEqual([], ba.search(bitarray('11'), 1, len(ba)+1)) def test_fill(self): From 57d4192f35c32bf84af20d263386d90e0a103c71 Mon Sep 17 00:00:00 2001 From: Andre Merzky Date: Tue, 17 May 2016 14:29:55 +0200 Subject: [PATCH 08/23] expose setrange --- bitarray/_bitarray.c | 38 ++++++++++++++++++++++++++++++++++++-- bitarray/test_bitarray.py | 6 ++++++ setup.py | 0 update_readme.py | 0 4 files changed, 42 insertions(+), 2 deletions(-) mode change 100644 => 100755 setup.py mode change 100644 => 100755 update_readme.py diff --git a/bitarray/_bitarray.c b/bitarray/_bitarray.c index 843d0c6af..b904fdd8c 100644 --- a/bitarray/_bitarray.c +++ b/bitarray/_bitarray.c @@ -372,15 +372,20 @@ bitwise(bitarrayobject *self, PyObject *arg, enum op_type oper) } /* set the bits from start to stop (excluding) in self to val */ -static void +idx_t setrange(bitarrayobject *self, idx_t start, idx_t stop, int val) { idx_t i; + idx_t ret = 0; assert(0 <= start && start <= self->nbits); - assert(0 <= stop && stop <= self->nbits); + assert(0 <= stop && stop <= self->nbits); for (i = start; i < stop; i++) + { + ret++; setbit(self, i, val); + } + return ret; } static void @@ -1034,6 +1039,33 @@ Return True if bitarray contains x, False otherwise.\n\ The value x may be a boolean (or integer between 0 and 1), or a bitarray."); +static PyObject * +bitarray_setrange(bitarrayobject *self, PyObject *args) +{ + Py_ssize_t start = -1; /* start of range to set */ + Py_ssize_t stop = -1; /* end of range to set */ + PyObject *v = NULL; /* value to set (evals to true/false) */ + int vi = 0; /* int val to set */ + idx_t ret = 0; /* return value: number of bits set */ + + if (!PyArg_ParseTuple(args, PY_SSIZE_T_FMT PY_SSIZE_T_FMT "O:_setrange", &start, &stop, &v)) + return NULL; + + vi = PyObject_IsTrue(v); + + /* setrange checks for idx *<* stop, so we inc by one */ + ret = setrange(self, start, stop+1, vi); + return PyLong_FromLongLong(ret); +} + +PyDoc_STRVAR(setrange_doc, +"setrange(bitarray, start, stop, val) -> int\n\ +\n\ +Sets a range in the bitarray to the given value, and returns the number\n\ +of bits set.\n\ +"); + + static PyObject * bitarray_search(bitarrayobject *self, PyObject *args) { @@ -2477,6 +2509,8 @@ bitarray_methods[] = { reverse_doc}, {"setall", (PyCFunction) bitarray_setall, METH_O, setall_doc}, + {"setrange", (PyCFunction) bitarray_setrange, METH_VARARGS, + setrange_doc}, {"search", (PyCFunction) bitarray_search, METH_VARARGS, search_doc}, {"itersearch", (PyCFunction) bitarray_itersearch, METH_O, diff --git a/bitarray/test_bitarray.py b/bitarray/test_bitarray.py index b5d81e36b..c7cfce646 100644 --- a/bitarray/test_bitarray.py +++ b/bitarray/test_bitarray.py @@ -1356,6 +1356,12 @@ def test_search(self): self.assertRaises(ValueError, a.search, bitarray()) self.assertRaises(TypeError, a.search, '010') + def test_setrange(self): + a = bitarray('11111') + n = a.setrange(1, 3, False) + self.assertEqual(n, 3) + self.assertEqual(a, bitarray('10001')) + def test_itersearch(self): a = bitarray('10011') self.assertRaises(ValueError, a.itersearch, bitarray()) diff --git a/setup.py b/setup.py old mode 100644 new mode 100755 diff --git a/update_readme.py b/update_readme.py old mode 100644 new mode 100755 From cc0cebd76b42d6287dc1cea148de8c8a36e37b30 Mon Sep 17 00:00:00 2001 From: Andre Merzky Date: Sat, 4 Jun 2016 01:41:15 +0200 Subject: [PATCH 09/23] add bulk op for setting scattered bits --- bitarray/_bitarray.c | 50 +++++++++++++++++++++++++++++++++++++++ bitarray/test_bitarray.py | 15 ++++++++++++ 2 files changed, 65 insertions(+) diff --git a/bitarray/_bitarray.c b/bitarray/_bitarray.c index b904fdd8c..0ff6169d9 100644 --- a/bitarray/_bitarray.c +++ b/bitarray/_bitarray.c @@ -1039,6 +1039,54 @@ Return True if bitarray contains x, False otherwise.\n\ The value x may be a boolean (or integer between 0 and 1), or a bitarray."); +static PyObject * +bitarray_setlist(bitarrayobject *self, PyObject *args) +{ + PyObject *p = NULL; /* positions to set (evals to true/false) */ + PyObject *tmp = NULL; /* positions to set (evals to true/false) */ + PyObject *v = NULL; /* value to set (evals to true/false) */ + int vi = 0; /* int val to set */ + idx_t i = 0; /* iteration index */ + idx_t npos = 0; /* loop size */ + idx_t pos = 0; /* loop var */ + + if (!PyArg_ParseTuple(args, "OO:_setlist", &p, &v)) + return NULL; + + if (!PyList_Check(p)) { + PyErr_SetString(PyExc_TypeError, "position list expected"); + return NULL; + } + + vi = PyObject_IsTrue(v); + + npos = PyList_Size(p); + if (npos < 0) + return NULL; /* Not a list */ + + for (i=0; i int\n\ +\n\ +Sets the bitarray to the given value for each position given in the list,\n\ +and returns the number of bits set.\n\ +"); + + static PyObject * bitarray_setrange(bitarrayobject *self, PyObject *args) { @@ -2511,6 +2559,8 @@ bitarray_methods[] = { setall_doc}, {"setrange", (PyCFunction) bitarray_setrange, METH_VARARGS, setrange_doc}, + {"setlist", (PyCFunction) bitarray_setlist, METH_VARARGS, + setlist_doc}, {"search", (PyCFunction) bitarray_search, METH_VARARGS, search_doc}, {"itersearch", (PyCFunction) bitarray_itersearch, METH_O, diff --git a/bitarray/test_bitarray.py b/bitarray/test_bitarray.py index c7cfce646..780eb350e 100644 --- a/bitarray/test_bitarray.py +++ b/bitarray/test_bitarray.py @@ -560,6 +560,21 @@ def test_setslice_to_int(self): slice(None, 2, None), -1) + def test_setset_to_bool(self): + a = bitarray('11111111') + a.setset([0, 2, 4, 6], False) + self.assertEqual(a, bitarray('01010101')) + a.setset([0, 4], True) + self.assertEqual(a, bitarray('11011101')) + + def test_setset_to_int(self): + a = bitarray('11111111') + a.setset([0, 2, 4, 6], 0) + self.assertEqual(a, bitarray('01010101')) + a.setset([0, 4], 1) + self.assertEqual(a, bitarray('11011101')) + + def test_delitem1(self): a = bitarray('100110') del a[1] From 4f19da16c41cdbeb3cbc9fa8984310427bafc808 Mon Sep 17 00:00:00 2001 From: Philippe Ombredanne Date: Sun, 29 Nov 2015 08:22:37 +0100 Subject: [PATCH 10/23] Fixed minor doc typo. --- README.rst | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/README.rst b/README.rst index b98efea73..f33a16b2f 100644 --- a/README.rst +++ b/README.rst @@ -5,7 +5,7 @@ bitarray: efficient arrays of booleans This module provides an object type which efficiently represents an array of booleans. Bitarrays are sequence types and behave very much like usual lists. Eight bits are represented by one byte in a contiguous block of -memory. The user can select between two representations; little-endian +memory. The user can select between two representations: little-endian and big-endian. All of the functionality is implemented in C. Methods for accessing the machine representation are provided. This can be useful when bit level access to binary files is required, @@ -156,8 +156,8 @@ Bit endianness -------------- Since a bitarray allows addressing of individual bits, where the machine -represents 8 bits in one byte, there two obvious choices for this mapping; -little- and big-endian. +represents 8 bits in one byte, there are two obvious choices for this +mapping: little- and big-endian. When creating a new bitarray object, the endianness can always be specified explicitly: From 3aa5e346301ac5d76d64a0df01eff543b58d3230 Mon Sep 17 00:00:00 2001 From: Jessy Diamond Exum Date: Sat, 22 Oct 2016 14:02:15 -0700 Subject: [PATCH 11/23] Initial refactor for fork. --- .travis.yml | 7 +++ setup.cfg | 5 ++ setup.py | 41 ++++++++++--- {bitarray => test}/test_bitarray.py | 94 +++++++++-------------------- 4 files changed, 74 insertions(+), 73 deletions(-) create mode 100644 .travis.yml create mode 100644 setup.cfg rename {bitarray => test}/test_bitarray.py (97%) diff --git a/.travis.yml b/.travis.yml new file mode 100644 index 000000000..96ba6eb5a --- /dev/null +++ b/.travis.yml @@ -0,0 +1,7 @@ +dist: xenial +language: python +python: +- "2.7" +- "3.5" +install: pip install -e . +script: py.test diff --git a/setup.cfg b/setup.cfg new file mode 100644 index 000000000..47176bfb0 --- /dev/null +++ b/setup.cfg @@ -0,0 +1,5 @@ +[bdist_wheel] +universal=1 + +[tool:pytest] +testpaths = test \ No newline at end of file diff --git a/setup.py b/setup.py index 20f98ef52..0fd22a09e 100644 --- a/setup.py +++ b/setup.py @@ -1,22 +1,44 @@ +#-*- coding: utf-8 -*- + +""" + ProteusISC + ~~~~~ + + Setup + ````` + + $ pip install . # or python setup.py install +""" + +import codecs +import os import re -from os.path import join from distutils.core import setup, Extension +here = os.path.abspath(os.path.dirname(__file__)) -kwds = {} -kwds['long_description'] = open('README.rst').read() +def read(*parts): + """Taken from pypa pip setup.py: + intentionally *not* adding an encoding option to open, See: + https://github.com/pypa/virtualenv/issues/201#issuecomment-3145690 + """ + return codecs.open(os.path.join(here, *parts), 'r').read() -# Read version from bitarray/__init__.py -pat = re.compile(r'__version__\s*=\s*(\S+)', re.M) -data = open(join('bitarray', '__init__.py')).read() -kwds['version'] = eval(pat.search(data).group(1)) +def find_version(*file_paths): + version_file = read(*file_paths) + version_match = re.search(r"^__version__ = ['\"]([^'\"]*)['\"]", + version_file, re.M) + if version_match: + return version_match.group(1) + raise RuntimeError("Unable to find version string.") setup( name = "bitarray", + version=find_version("bitarray", "__init__.py"), author = "Ilan Schnell", author_email = "ilanschnell@gmail.com", - url = "https://github.com/ilanschnell/bitarray", + url = "https://github.com/diamondman/bitarray", license = "PSF", classifiers = [ "License :: OSI Approved :: Python Software Foundation License", @@ -38,8 +60,9 @@ "Topic :: Utilities", ], description = "efficient arrays of booleans -- C extension", + long_description=open(os.path.join(os.path.dirname(__file__), + 'README.rst')).read(), packages = ["bitarray"], ext_modules = [Extension(name = "bitarray._bitarray", sources = ["bitarray/_bitarray.c"])], - **kwds ) diff --git a/bitarray/test_bitarray.py b/test/test_bitarray.py similarity index 97% rename from bitarray/test_bitarray.py rename to test/test_bitarray.py index 44de2f025..6e874ed81 100644 --- a/bitarray/test_bitarray.py +++ b/test/test_bitarray.py @@ -21,8 +21,6 @@ from bitarray import bitarray, bitdiff, bits2bytes, __version__ -tests = [] - if sys.version_info[:2] < (2, 6): def next(x): return x.next() @@ -169,7 +167,6 @@ def test_bits2bytes(self): self.assertEqual(bits2bytes(n), m) -tests.append(TestsModuleFunctions) # --------------------------------------------------------------------------- @@ -324,8 +321,6 @@ def test_WrongArgs(self): self.assertRaises(ValueError, bitarray.__new__, bitarray, 0, 'foo') -tests.append(CreateObjectTests) - # --------------------------------------------------------------------------- class ToObjectsTests(unittest.TestCase, Util): @@ -353,8 +348,6 @@ def test_tuple(self): self.assertEqual(tuple(a), tuple(a.tolist())) -tests.append(ToObjectsTests) - # --------------------------------------------------------------------------- class MetaDataTests(unittest.TestCase): @@ -408,8 +401,6 @@ def test_length(self): self.assertEqual(a.length(), n) -tests.append(MetaDataTests) - # --------------------------------------------------------------------------- class SliceTests(unittest.TestCase, Util): @@ -589,8 +580,6 @@ def test_delitem2(self): self.assertEQUAL(c, bitarray(cc, endian=c.endian())) -tests.append(SliceTests) - # --------------------------------------------------------------------------- class MiscTests(unittest.TestCase, Util): @@ -750,8 +739,6 @@ def test_overflow(self): self.assertRaises(OverflowError, a.__imul__, 17180) -tests.append(MiscTests) - # --------------------------------------------------------------------------- class SpecialMethodTests(unittest.TestCase, Util): @@ -846,8 +833,6 @@ def test_not_equality(self): self.assertReallyNotEqual(bitarray(''), bitarray('0')) self.assertReallyNotEqual(bitarray('0'), bitarray('1')) -tests.append(SpecialMethodTests) - # --------------------------------------------------------------------------- class NumberTests(unittest.TestCase, Util): @@ -941,8 +926,6 @@ def test_imul(self): self.assertRaises(TypeError, a.__imul__, None) -tests.append(NumberTests) - # --------------------------------------------------------------------------- class BitwiseTests(unittest.TestCase, Util): @@ -1028,8 +1011,6 @@ def test_invert(self): self.check_obj(b) -tests.append(BitwiseTests) - # --------------------------------------------------------------------------- class SequenceTests(unittest.TestCase, Util): @@ -1089,8 +1070,6 @@ def test_contains4(self): self.assertEqual(bitarray(s) in a, r) -tests.append(SequenceTests) - # --------------------------------------------------------------------------- class ExtendTests(unittest.TestCase, Util): @@ -1212,8 +1191,6 @@ def test_string01(self): self.check_obj(c) -tests.append(ExtendTests) - # --------------------------------------------------------------------------- class MethodTests(unittest.TestCase, Util): @@ -1578,8 +1555,6 @@ def test_bytereverse(self): self.check_obj(b) -tests.append(MethodTests) - # --------------------------------------------------------------------------- class StringTests(unittest.TestCase, Util): @@ -1691,8 +1666,6 @@ def test_pack(self): self.assertRaises(TypeError, a.pack, bitarray()) -tests.append(StringTests) - # --------------------------------------------------------------------------- class FileTests(unittest.TestCase, Util): @@ -1899,8 +1872,6 @@ def test_tofile(self): self.assertEqual(c, b) -tests.append(FileTests) - # --------------------------------------------------------------------------- class PrefixCodeTests(unittest.TestCase, Util): @@ -2103,43 +2074,38 @@ def test_real_example(self): self.assertEqual(''.join(a.iterdecode(code)), message) -tests.append(PrefixCodeTests) - # -------------- Buffer Interface (Python 2.7 only for now) ---------------- - -class BufferInterfaceTests(unittest.TestCase): - - def test_read1(self): - a = bitarray('01000001' '01000010' '01000011', endian='big') - v = memoryview(a) - self.assertEqual(len(v), 3) - self.assertEqual(v[0], 'A') - self.assertEqual(v[:].tobytes(), 'ABC') - a[13] = 1 - self.assertEqual(v[:].tobytes(), 'AFC') - - def test_read2(self): - a = bitarray([randint(0, 1) for d in range(8000)]) - v = memoryview(a) - self.assertEqual(len(v), 1000) - b = a[345 * 8 : 657 * 8] - self.assertEqual(v[345:657].tobytes(), b.tobytes()) - self.assertEqual(v[:].tobytes(), a.tobytes()) - - def test_write(self): - a = bitarray(800000) - a.setall(0) - v = memoryview(a) - self.assertFalse(v.readonly) - v[50000] = '\xff' - self.assertEqual(a[399999:400009], bitarray('0111111110')) - a[400003] = 0 - self.assertEqual(a[399999:400009], bitarray('0111011110')) - v[30001:30004] = 'ABC' - self.assertEqual(a[240000:240040].tobytes(), '\x00ABC\x00') - if sys.version_info[:2] == (2, 7): - tests.append(BufferInterfaceTests) + class BufferInterfaceTests(unittest.TestCase): + + def test_read1(self): + a = bitarray('01000001' '01000010' '01000011', endian='big') + v = memoryview(a) + self.assertEqual(len(v), 3) + self.assertEqual(v[0], 'A') + self.assertEqual(v[:].tobytes(), 'ABC') + a[13] = 1 + self.assertEqual(v[:].tobytes(), 'AFC') + + def test_read2(self): + a = bitarray([randint(0, 1) for d in range(8000)]) + v = memoryview(a) + self.assertEqual(len(v), 1000) + b = a[345 * 8 : 657 * 8] + self.assertEqual(v[345:657].tobytes(), b.tobytes()) + self.assertEqual(v[:].tobytes(), a.tobytes()) + + def test_write(self): + a = bitarray(800000) + a.setall(0) + v = memoryview(a) + self.assertFalse(v.readonly) + v[50000] = '\xff' + self.assertEqual(a[399999:400009], bitarray('0111111110')) + a[400003] = 0 + self.assertEqual(a[399999:400009], bitarray('0111011110')) + v[30001:30004] = 'ABC' + self.assertEqual(a[240000:240040].tobytes(), '\x00ABC\x00') # --------------------------------------------------------------------------- From d4404550edab9e232d41ae1735fd47b2a5dc1ac4 Mon Sep 17 00:00:00 2001 From: John-John Tedro Date: Wed, 6 Aug 2014 16:00:41 +0200 Subject: [PATCH 12/23] Use SIMD instructions for bitwise operations (gcc specific). * Avoid case by generating BITWISE_FUNC and BITARRAY_FUNC. * Change type of zero and one to (unsigned char) due to compiler complaining of overflows on '-pedantic'. This resulted in a ~10x speedup on large bitarrays for me using the following test. ```python import bitarray import timeit a = bitarray.bitarray(50000) b = bitarray.bitarray(50000) def test_and(): global a a &= b def test_or(): global a a |= b def test_xor(): global a a ^= b print timeit.timeit("test_and()", "from __main__ import test_and") print timeit.timeit("test_or()", "from __main__ import test_or") print timeit.timeit("test_xor()", "from __main__ import test_xor") ``` ``` upstream master: 20.3912520409 20.6214001179 20.5252711773 with this patch: 2.11912703514 2.14890694618 2.1437420845 ``` About memcpy usage in simd_v16uc_op: I found that `memcpy` does a good job when inspecting compiler output. On my system it uses `movdqa` to copy memory to and from xmm registers. --- bitarray/_bitarray.c | 140 +++++++++++++++++++++++++++---------------- 1 file changed, 90 insertions(+), 50 deletions(-) diff --git a/bitarray/_bitarray.c b/bitarray/_bitarray.c index d2c19cb9f..21abb0892 100644 --- a/bitarray/_bitarray.c +++ b/bitarray/_bitarray.c @@ -87,13 +87,32 @@ static PyTypeObject Bitarraytype; #define BYTES(bits) (((bits) == 0) ? 0 : (((bits) - 1) / 8 + 1)) -#define BITMASK(endian, i) (((char) 1) << ((endian) ? (7 - (i)%8) : (i)%8)) +#define BITMASK(endian, i) (((unsigned char) 1) << ((endian) ? (7 - (i)%8) : (i)%8)) /* ------------ low level access to bits in bitarrayobject ------------- */ #define GETBIT(self, i) \ ((self)->ob_item[(i) / 8] & BITMASK((self)->endian, i) ? 1 : 0) +#define V16C_SIZE 16 + +#define IS_GCC defined(__GNUC__) + +#if IS_GCC +typedef char v16c __attribute__ ((vector_size (V16C_SIZE))); + +/* + * Perform bitwise operation OP on 16 bytes of memory at a time. + */ +#define simd_v16uc_op(A, B, OP) do { \ + v16c __a, __b; \ + memcpy(&__a, A, V16C_SIZE); \ + memcpy(&__b, B, V16C_SIZE); \ + v16c __r = __a OP __b; \ + memcpy(A, &__r, V16C_SIZE); \ +} while(0); +#endif + static void setbit(bitarrayobject *self, idx_t i, int bit) { @@ -334,43 +353,6 @@ enum op_type { OP_xor, }; -/* perform bitwise operation */ -static int -bitwise(bitarrayobject *self, PyObject *arg, enum op_type oper) -{ - bitarrayobject *other; - Py_ssize_t i; - - if (!bitarray_Check(arg)) { - PyErr_SetString(PyExc_TypeError, - "bitarray object expected for bitwise operation"); - return -1; - } - other = (bitarrayobject *) arg; - if (self->nbits != other->nbits) { - PyErr_SetString(PyExc_ValueError, - "bitarrays of equal length expected for bitwise operation"); - return -1; - } - setunused(self); - setunused(other); - switch (oper) { - case OP_and: - for (i = 0; i < Py_SIZE(self); i++) - self->ob_item[i] &= other->ob_item[i]; - break; - case OP_or: - for (i = 0; i < Py_SIZE(self); i++) - self->ob_item[i] |= other->ob_item[i]; - break; - case OP_xor: - for (i = 0; i < Py_SIZE(self); i++) - self->ob_item[i] ^= other->ob_item[i]; - break; - } - return 0; -} - /* set the bits from start to stop (excluding) in self to val */ static void setrange(bitarrayobject *self, idx_t start, idx_t stop, int val) @@ -543,7 +525,7 @@ append_item(bitarrayobject *self, PyObject *item) } static PyObject * -unpack(bitarrayobject *self, char zero, char one) +unpack(bitarrayobject *self, unsigned char zero, unsigned char one) { PyObject *res; Py_ssize_t i; @@ -1658,7 +1640,7 @@ use the extend method."); static PyObject * bitarray_unpack(bitarrayobject *self, PyObject *args, PyObject *kwds) { - char zero = 0x00, one = 0xff; + unsigned char zero = 0x00, one = 0xff; static char* kwlist[] = {"zero", "one", NULL}; if (!PyArg_ParseTupleAndKeywords(args, kwds, "|cc:unpack", kwlist, @@ -2068,38 +2050,96 @@ bitarray_cpinvert(bitarrayobject *self) return res; } -#define BITWISE_FUNC(oper) \ +#if IS_GCC +#define BITWISE_FUNC_INTERNAL(SELF, OTHER, OP, OPEQ) do { \ + Py_ssize_t i = 0; \ + \ + for (; i + V16C_SIZE < Py_SIZE(SELF); i += V16C_SIZE) { \ + simd_v16uc_op((SELF)->ob_item + i, (OTHER)->ob_item + i, OP); \ + } \ + \ + for (; i < Py_SIZE(SELF); ++i) { \ + (SELF)->ob_item[i] OPEQ (OTHER)->ob_item[i]; \ + } \ +} while(0); +#else +#define BITWISE_FUNC_INTERNAL(SELF, OTHER, OP, OPEQ) do { \ + Py_ssize_t i; \ + for (i = 0; i < Py_SIZE(SELF); ++i) { \ + (SELF)->ob_item[i] OPEQ (OTHER)->ob_item[i]; \ + } \ +} while(0); +#endif + +/* + * Generate function that performs bitwise operations. + **/ +#define BITWISE_FUNC(OPNAME, OP, OPEQ) \ +static int bitwise_ ## OPNAME (bitarrayobject *self, PyObject *arg) \ +{ \ + bitarrayobject *other; \ + \ + if (!bitarray_Check(arg)) { \ + PyErr_SetString(PyExc_TypeError, \ + "bitarray object expected for bitwise operation"); \ + return -1; \ + } \ + \ + other = (bitarrayobject *) arg; \ + \ + if (self->nbits != other->nbits) { \ + PyErr_SetString(PyExc_ValueError, \ + "bitarrays of equal length expected for bitwise operation"); \ + return -1; \ + } \ + \ + setunused(self); \ + setunused(other); \ + \ + BITWISE_FUNC_INTERNAL(self, other, OP, OPEQ); \ + \ + return 0; \ +} + +BITWISE_FUNC(xor, ^, ^=) +BITWISE_FUNC(and, &, &=) +BITWISE_FUNC(or, |, |=) + +#define BITARRAY_FUNC(oper) \ static PyObject * \ bitarray_ ## oper (bitarrayobject *self, PyObject *other) \ { \ PyObject *res; \ \ res = bitarray_copy(self); \ - if (bitwise((bitarrayobject *) res, other, OP_ ## oper) < 0) { \ + \ + if (bitwise_ ## oper((bitarrayobject *) res, other) < 0) { \ Py_DECREF(res); \ return NULL; \ } \ + \ return res; \ } -BITWISE_FUNC(and) -BITWISE_FUNC(or) -BITWISE_FUNC(xor) +BITARRAY_FUNC(and) +BITARRAY_FUNC(or) +BITARRAY_FUNC(xor) -#define BITWISE_IFUNC(oper) \ +#define BITARRAY_IFUNC(oper) \ static PyObject * \ bitarray_i ## oper (bitarrayobject *self, PyObject *other) \ { \ - if (bitwise(self, other, OP_ ## oper) < 0) \ + if (bitwise_ ## oper(self, other) < 0) \ return NULL; \ + \ Py_INCREF(self); \ return (PyObject *) self; \ } -BITWISE_IFUNC(and) -BITWISE_IFUNC(or) -BITWISE_IFUNC(xor) +BITARRAY_IFUNC(and) +BITARRAY_IFUNC(or) +BITARRAY_IFUNC(xor) /******************* variable length encoding and decoding ***************/ From c2dceced1de4b21db16e31a6988a2b6577fc78be Mon Sep 17 00:00:00 2001 From: Andre Merzky Date: Sun, 23 Oct 2016 00:03:33 +0200 Subject: [PATCH 13/23] catch up with a name change --- test/test_bitarray.py | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) diff --git a/test/test_bitarray.py b/test/test_bitarray.py index 6cae54d94..b6869493e 100644 --- a/test/test_bitarray.py +++ b/test/test_bitarray.py @@ -551,18 +551,18 @@ def test_setslice_to_int(self): slice(None, 2, None), -1) - def test_setset_to_bool(self): + def test_setlist_to_bool(self): a = bitarray('11111111') - a.setset([0, 2, 4, 6], False) + a.setlist([0, 2, 4, 6], False) self.assertEqual(a, bitarray('01010101')) - a.setset([0, 4], True) + a.setlist([0, 4], True) self.assertEqual(a, bitarray('11011101')) - def test_setset_to_int(self): + def test_setlist_to_int(self): a = bitarray('11111111') - a.setset([0, 2, 4, 6], 0) + a.setlist([0, 2, 4, 6], 0) self.assertEqual(a, bitarray('01010101')) - a.setset([0, 4], 1) + a.setlist([0, 4], 1) self.assertEqual(a, bitarray('11011101')) From d9f9cbaf4a17dc1bdb6b5a47aa80e932a96eab25 Mon Sep 17 00:00:00 2001 From: Jessy Diamond Exum Date: Sat, 22 Oct 2016 14:49:04 -0700 Subject: [PATCH 14/23] Removed unnecessary old test code. --- test/test_bitarray.py | 19 ------------------- 1 file changed, 19 deletions(-) diff --git a/test/test_bitarray.py b/test/test_bitarray.py index 6e874ed81..e6d12d7d1 100644 --- a/test/test_bitarray.py +++ b/test/test_bitarray.py @@ -2106,22 +2106,3 @@ def test_write(self): self.assertEqual(a[399999:400009], bitarray('0111011110')) v[30001:30004] = 'ABC' self.assertEqual(a[240000:240040].tobytes(), '\x00ABC\x00') - -# --------------------------------------------------------------------------- - -def run(verbosity=1, repeat=1): - print('bitarray is installed in: %s' % os.path.dirname(__file__)) - print('bitarray version: %s' % __version__) - print('Python version: %s' % sys.version) - - suite = unittest.TestSuite() - for cls in tests: - for _ in range(repeat): - suite.addTest(unittest.makeSuite(cls)) - - runner = unittest.TextTestRunner(verbosity=verbosity) - return runner.run(suite) - - -if __name__ == '__main__': - run() From deab08d6a6cd8c4dbed932ffa5bc9da8b7ffd63a Mon Sep 17 00:00:00 2001 From: Jessy Diamond Exum Date: Sat, 22 Oct 2016 15:34:39 -0700 Subject: [PATCH 15/23] Added travisci badge to README --- README.rst | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/README.rst b/README.rst index f33a16b2f..73afe7c6a 100644 --- a/README.rst +++ b/README.rst @@ -2,6 +2,9 @@ bitarray: efficient arrays of booleans ====================================== +.. image:: https://travis-ci.org/diamondman/bitarray.svg?branch=master + :target: https://travis-ci.org/diamondman/bitarray + This module provides an object type which efficiently represents an array of booleans. Bitarrays are sequence types and behave very much like usual lists. Eight bits are represented by one byte in a contiguous block of @@ -68,7 +71,7 @@ Once you have installed the package, you may want to test it:: ........................................... ---------------------------------------------------------------------- Ran 134 tests in 1.396s - + OK You can always import the function test, @@ -305,26 +308,26 @@ Reference the optional initial, and endianness. If no object is provided, the bitarray is initialized to have length zero. The initial object may be of the following types: - + int, long Create bitarray of length given by the integer. The initial values in the array are random, because only the memory allocated. - + string Create bitarray from a string of '0's and '1's. - + list, tuple, iterable Create bitarray from a sequence, each element in the sequence is converted to a bit using truth value value. - + bitarray Create bitarray from another bitarray. This is done by copying the memory holding the bitarray data, and is hence very fast. - + The optional keyword arguments 'endian' specifies the bit endianness of the created bitarray object. Allowed values are 'big' and 'little' (default is 'big'). - + Note that setting the bit endianness only has an effect when accessing the machine representation of the bitarray, i.e. when using the methods: tofile, fromfile, tobytes, frombytes. From 82c6f4152fd7bfaf032b4d444e64469dbd665488 Mon Sep 17 00:00:00 2001 From: Andre Merzky Date: Sun, 23 Oct 2016 00:55:30 +0200 Subject: [PATCH 16/23] remove setrange from API again --- bitarray/_bitarray.c | 29 ----------------------------- 1 file changed, 29 deletions(-) diff --git a/bitarray/_bitarray.c b/bitarray/_bitarray.c index 0ff6169d9..1c80540f9 100644 --- a/bitarray/_bitarray.c +++ b/bitarray/_bitarray.c @@ -1087,33 +1087,6 @@ and returns the number of bits set.\n\ "); -static PyObject * -bitarray_setrange(bitarrayobject *self, PyObject *args) -{ - Py_ssize_t start = -1; /* start of range to set */ - Py_ssize_t stop = -1; /* end of range to set */ - PyObject *v = NULL; /* value to set (evals to true/false) */ - int vi = 0; /* int val to set */ - idx_t ret = 0; /* return value: number of bits set */ - - if (!PyArg_ParseTuple(args, PY_SSIZE_T_FMT PY_SSIZE_T_FMT "O:_setrange", &start, &stop, &v)) - return NULL; - - vi = PyObject_IsTrue(v); - - /* setrange checks for idx *<* stop, so we inc by one */ - ret = setrange(self, start, stop+1, vi); - return PyLong_FromLongLong(ret); -} - -PyDoc_STRVAR(setrange_doc, -"setrange(bitarray, start, stop, val) -> int\n\ -\n\ -Sets a range in the bitarray to the given value, and returns the number\n\ -of bits set.\n\ -"); - - static PyObject * bitarray_search(bitarrayobject *self, PyObject *args) { @@ -2557,8 +2530,6 @@ bitarray_methods[] = { reverse_doc}, {"setall", (PyCFunction) bitarray_setall, METH_O, setall_doc}, - {"setrange", (PyCFunction) bitarray_setrange, METH_VARARGS, - setrange_doc}, {"setlist", (PyCFunction) bitarray_setlist, METH_VARARGS, setlist_doc}, {"search", (PyCFunction) bitarray_search, METH_VARARGS, From 5ccbdcff74cd56e6d804bd96ba0d472e85887c92 Mon Sep 17 00:00:00 2001 From: Andre Merzky Date: Sun, 23 Oct 2016 00:56:52 +0200 Subject: [PATCH 17/23] remove setrange test --- test/test_bitarray.py | 6 ------ 1 file changed, 6 deletions(-) diff --git a/test/test_bitarray.py b/test/test_bitarray.py index b6869493e..cb2507500 100644 --- a/test/test_bitarray.py +++ b/test/test_bitarray.py @@ -1348,12 +1348,6 @@ def test_search(self): self.assertRaises(ValueError, a.search, bitarray()) self.assertRaises(TypeError, a.search, '010') - def test_setrange(self): - a = bitarray('11111') - n = a.setrange(1, 3, False) - self.assertEqual(n, 3) - self.assertEqual(a, bitarray('10001')) - def test_itersearch(self): a = bitarray('10011') self.assertRaises(ValueError, a.itersearch, bitarray()) From 306a29e23d31a94cbe6a79be0332b1b43c5a2df5 Mon Sep 17 00:00:00 2001 From: Andre Merzky Date: Sun, 23 Oct 2016 01:08:01 +0200 Subject: [PATCH 18/23] doc updates --- README.rst | 14 +++++++++++--- bitarray/_bitarray.c | 3 ++- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/README.rst b/README.rst index f33a16b2f..6ad0b91f8 100644 --- a/README.rst +++ b/README.rst @@ -462,17 +462,25 @@ Reference Reverse the order of bits in the array (in-place). -``search(bitarray, [limit])`` -> list +``search(bitarray, [limit], [pos])`` -> list Searches for the given a bitarray in self, and returns the start positions where bitarray matches self as a list. - The optional argument limits the number of search results to the integer - specified. By default, all search results are returned. + The optional `limit` argument limits the number of search results to the + integer specified. By default, all search results are returned. + The optional `pos` argument begins the search at the position specified. + By default, search begins at position 0. If no match is found until the end, + the search will wrap around until reaching the start position again. ``setall(value)`` Set all bits in the bitarray to bool(value). +``setlist(list, val) -> int`` + Sets the bitarray to the given value for each position given in the list, + and returns the number of bits set. + + ``sort(reverse=False)`` Sort the bits in the array (in-place). diff --git a/bitarray/_bitarray.c b/bitarray/_bitarray.c index 1c80540f9..5aaffd344 100644 --- a/bitarray/_bitarray.c +++ b/bitarray/_bitarray.c @@ -1142,7 +1142,8 @@ where bitarray matches self as a list.\n\ The optional 'limit' argument limits the number of search results to the \n\ integer specified. By default, all search results are returned.\n\ The optional 'pos' argument begins the search at the position specified.\n\ -By default, search begins at position 0.\n\ +By default, search begins at position 0. If no match is found until the end,\n\ +the search will wrap around until reaching the start position again.\n\ "); From 776342ba68741ec96d1a9cb1bc376326dae971bd Mon Sep 17 00:00:00 2001 From: Eugene Toder Date: Sun, 23 Oct 2016 00:14:45 -0400 Subject: [PATCH 19/23] Fix Clang compilation warnings and error handling. * setbit() takes int, not long. * idx_t (index in bitarray) is long long, which can be bigger than Py_ssize_t. Use these types as appropriate. * Add error checking and range checking to setlist(). --- bitarray/_bitarray.c | 47 +++++++++++++++++++++----------------------- 1 file changed, 22 insertions(+), 25 deletions(-) diff --git a/bitarray/_bitarray.c b/bitarray/_bitarray.c index 5aaffd344..548a86fc4 100644 --- a/bitarray/_bitarray.c +++ b/bitarray/_bitarray.c @@ -529,7 +529,7 @@ search(bitarrayobject *self, bitarrayobject *xa, idx_t p) static int set_item(bitarrayobject *self, idx_t i, PyObject *v) { - long vi; + int vi; assert(0 <= i && i < self->nbits); vi = PyObject_IsTrue(v); @@ -969,7 +969,7 @@ bitarray_index(bitarrayobject *self, PyObject *args) { PyObject *x; idx_t i, start = 0, stop = -1; - long vi; + int vi; if (!PyArg_ParseTuple(args, "O|LL:index", &x, &start, &stop)) return NULL; @@ -1043,40 +1043,36 @@ static PyObject * bitarray_setlist(bitarrayobject *self, PyObject *args) { PyObject *p = NULL; /* positions to set (evals to true/false) */ - PyObject *tmp = NULL; /* positions to set (evals to true/false) */ PyObject *v = NULL; /* value to set (evals to true/false) */ int vi = 0; /* int val to set */ - idx_t i = 0; /* iteration index */ - idx_t npos = 0; /* loop size */ - idx_t pos = 0; /* loop var */ + Py_ssize_t i = 0; /* iteration index */ + Py_ssize_t npos = 0; /* loop size */ - if (!PyArg_ParseTuple(args, "OO:_setlist", &p, &v)) + if (!PyArg_ParseTuple(args, "O!O:_setlist", &PyList_Type, &p, &v)) return NULL; - if (!PyList_Check(p)) { - PyErr_SetString(PyExc_TypeError, "position list expected"); - return NULL; - } - vi = PyObject_IsTrue(v); npos = PyList_Size(p); if (npos < 0) return NULL; /* Not a list */ - for (i=0; i= self->nbits) + return PyErr_Format(PyExc_IndexError, + "bitarray index out of range"); setbit(self, pos, vi); } - return PyLong_FromLongLong(npos); +#ifdef IS_PY3K + return PyLong_FromSsize_t(npos); +#else + return PyInt_FromSsize_t(npos); +#endif } PyDoc_STRVAR(setlist_doc, @@ -1093,10 +1089,11 @@ bitarray_search(bitarrayobject *self, PyObject *args) PyObject *list = NULL; /* list of matching positions to be returned */ PyObject *x, *item = NULL; Py_ssize_t limit = -1; - Py_ssize_t pos = 0; + idx_t pos = 0; bitarrayobject *xa; - if (!PyArg_ParseTuple(args, "O|" PY_SSIZE_T_FMT PY_SSIZE_T_FMT ":_search", &x, &limit, &pos)) + if (!PyArg_ParseTuple(args, "O|" PY_SSIZE_T_FMT "L:_search", &x, &limit, + &pos)) return NULL; if (!bitarray_Check(x)) { @@ -1124,7 +1121,7 @@ bitarray_search(bitarrayobject *self, PyObject *args) pos++; if (item == NULL || PyList_Append(list, item) < 0) { Py_XDECREF(item); - Py_XDECREF(list); + Py_DECREF(list); return NULL; } Py_DECREF(item); @@ -1855,7 +1852,7 @@ static PyObject * bitarray_remove(bitarrayobject *self, PyObject *v) { idx_t i; - long vi; + int vi; vi = PyObject_IsTrue(v); if (vi < 0) From 0fc7fa205baddd035335d2fa57c0d8849e59374c Mon Sep 17 00:00:00 2001 From: Eugene Toder Date: Sat, 22 Oct 2016 19:40:47 -0400 Subject: [PATCH 20/23] Expose sequence methods via CPython interface tables. Use CPython interface tables instead of creating __special__ methods used by Python classes, because the former are much more efficient. On my simple benchmark (included) the speed difference is: == Without my change len(s) took: 0.12184501 2.928329542 1 in s took: 0.09164882 3.08188086266 s[0] took: 0.11913800 5.4947823314 s[0] = 1 took: 0.14618182 5.18284868977 del s2[-1] took: 0.11298084 2.5407810925 s[1:-1] took: 0.31020999 2.68454901087 s[-2:-1] = s1 took: 0.19746900 3.87809617456 del s2[-1:] took: 0.14702821 3.26322501442 s + s took: 0.30015993 1.7862811172 s * 2 took: 0.30452204 1.54962656327 s += s1 took: 0.12353301 2.9126216321 == With my change len(s) took: 0.04303408 1.03756545932 1 in s took: 0.03646994 1.22752842801 s[0] took: 0.03031492 1.40444473899 s[0] = 1 took: 0.03436899 1.2166845317 del s2[-1] took: 0.03327703 0.745054288063 s[1:-1] took: 0.21536398 1.76472702763 s[-2:-1] = s1 took: 0.07444501 1.46174588387 del s2[-1:] took: 0.06555700 1.45362367109 s + s took: 0.21622396 1.29087995035 s * 2 took: 0.22456384 1.14321346384 s += s1 took: 0.04029489 0.950439203248 The last column is the speed relative to list. So most methods got a 2-4x speed improvement, with 2.65x average. Most methods are still slower than corresponing list methods, so there are probably more improvements possible. --- bitarray/_bitarray.c | 205 ++++++++++++++++++----------------------- test/bench_bitarray.py | 31 +++++++ 2 files changed, 119 insertions(+), 117 deletions(-) create mode 100644 test/bench_bitarray.py diff --git a/bitarray/_bitarray.c b/bitarray/_bitarray.c index 5aaffd344..f39163eff 100644 --- a/bitarray/_bitarray.c +++ b/bitarray/_bitarray.c @@ -785,11 +785,11 @@ IntBool_AsInt(PyObject *v) long x; if (PyBool_Check(v)) - return PyObject_IsTrue(v); + return v == Py_True; #ifndef IS_PY3K if (PyInt_Check(v)) { - x = PyInt_AsLong(v); + x = PyInt_AS_LONG(v); } else #endif @@ -903,8 +903,18 @@ slice_GetIndicesEx(PySliceObject *r, idx_t length, Implementation of API methods **************************************************************************/ -static PyObject * +static Py_ssize_t bitarray_length(bitarrayobject *self) +{ + if (self->nbits > PY_SSIZE_T_MAX) { + PyErr_Format(PyExc_OverflowError, "bitarray is too large"); + return -1; + } + return self->nbits; +} + +static PyObject * +bitarray_py_length(bitarrayobject *self) { return PyLong_FromLongLong(self->nbits); } @@ -918,14 +928,6 @@ since __len__ will fail for a bitarray object with 2^31 or more elements\n\ on a 32bit machine, whereas this method will return the correct value,\n\ on 32bit and 64bit machines."); -PyDoc_STRVAR(len_doc, -"__len__() -> int\n\ -\n\ -Return the length, i.e. number of bits stored in the bitarray.\n\ -This method will fail for a bitarray object with 2^31 or more elements\n\ -on a 32bit machine. Use bitarray.length() instead."); - - static PyObject * bitarray_copy(bitarrayobject *self) { @@ -1009,17 +1011,17 @@ to this method are the same iterable objects which can given to a bitarray\n\ object upon initialization."); -static PyObject * +static int bitarray_contains(bitarrayobject *self, PyObject *x) { - long res; + int res; if (IS_INT_OR_BOOL(x)) { int vi; vi = IntBool_AsInt(x); if (vi < 0) - return NULL; + return -1; res = findfirst(self, vi, 0, -1) >= 0; } else if (bitarray_Check(x)) { @@ -1027,18 +1029,11 @@ bitarray_contains(bitarrayobject *self, PyObject *x) } else { PyErr_SetString(PyExc_TypeError, "bitarray or bool expected"); - return NULL; + return -1; } - return PyBool_FromLong(res); + return res; } -PyDoc_STRVAR(contains_doc, -"__contains__(x) -> bool\n\ -\n\ -Return True if bitarray contains x, False otherwise.\n\ -The value x may be a boolean (or integer between 0 and 1), or a bitarray."); - - static PyObject * bitarray_setlist(bitarrayobject *self, PyObject *args) { @@ -1881,7 +1876,17 @@ Raises ValueError if item is not present."); /* --------- special methods ----------- */ static PyObject * -bitarray_getitem(bitarrayobject *self, PyObject *a) +bitarray_item(bitarrayobject *self, Py_ssize_t i) +{ + if (i < 0 || i >= self->nbits) { + PyErr_SetString(PyExc_IndexError, "bitarray index out of range"); + return NULL; + } + return PyBool_FromLong(GETBIT(self, i)); +} + +static PyObject * +bitarray_subscript(bitarrayobject *self, PyObject *a) { PyObject *res; idx_t start, stop, step, slicelength, j, i = 0; @@ -1915,6 +1920,18 @@ bitarray_getitem(bitarrayobject *self, PyObject *a) return NULL; } +static int +bitarray_ass_item(bitarrayobject *self, Py_ssize_t i, PyObject *v) +{ + if (i < 0 || i >= self->nbits) { + PyErr_SetString(PyExc_IndexError, "bitarray index out of range"); + return -1; + } + if (v != NULL) + return set_item(self, i, v); + return delete_n(self, i, 1); +} + /* Sets the elements, specified by slice, in self to the value(s) given by v which is either a bitarray or a boolean. */ @@ -1971,62 +1988,36 @@ setslice(bitarrayobject *self, PySliceObject *slice, PyObject *v) return -1; } -static PyObject * -bitarray_setitem(bitarrayobject *self, PyObject *args) +static int +bitarray_ass_subscript(bitarrayobject *self, PyObject *a, PyObject *v) { - PyObject *a, *v; idx_t i = 0; - if (!PyArg_ParseTuple(args, "OO:__setitem__", &a, &v)) - return NULL; - if (IS_INDEX(a)) { if (getIndex(a, &i) < 0) - return NULL; + return -1; if (i < 0) i += self->nbits; if (i < 0 || i >= self->nbits) { PyErr_SetString(PyExc_IndexError, "bitarray index out of range"); - return NULL; + return -1; } - if (set_item(self, i, v) < 0) - return NULL; - Py_RETURN_NONE; + if (v != NULL) + return set_item(self, i, v); + return delete_n(self, i, 1); } if (PySlice_Check(a)) { - if (setslice(self, (PySliceObject *) a, v) < 0) - return NULL; - Py_RETURN_NONE; - } - PyErr_SetString(PyExc_TypeError, "index or slice expected"); - return NULL; -} + idx_t start, stop, step, slicelength, j; -static PyObject * -bitarray_delitem(bitarrayobject *self, PyObject *a) -{ - idx_t start, stop, step, slicelength, j, i = 0; + if (v != NULL) + return setslice(self, (PySliceObject *) a, v); - if (IS_INDEX(a)) { - if (getIndex(a, &i) < 0) - return NULL; - if (i < 0) - i += self->nbits; - if (i < 0 || i >= self->nbits) { - PyErr_SetString(PyExc_IndexError, "bitarray index out of range"); - return NULL; - } - if (delete_n(self, i, 1) < 0) - return NULL; - Py_RETURN_NONE; - } - if (PySlice_Check(a)) { if (slice_GetIndicesEx((PySliceObject *) a, self->nbits, &start, &stop, &step, &slicelength) < 0) { - return NULL; + return -1; } if (slicelength == 0) - Py_RETURN_NONE; + return 0; if (step < 0) { stop = start + 1; @@ -2035,9 +2026,7 @@ bitarray_delitem(bitarrayobject *self, PyObject *a) } if (step == 1) { assert(stop - start == slicelength); - if (delete_n(self, start, slicelength) < 0) - return NULL; - Py_RETURN_NONE; + return delete_n(self, start, slicelength); } /* this is the only complicated part when step > 1 */ for (i = j = start; i < self->nbits; i++) @@ -2045,22 +2034,16 @@ bitarray_delitem(bitarrayobject *self, PyObject *a) setbit(self, j, GETBIT(self, i)); j++; } - if (resize(self, self->nbits - slicelength) < 0) - return NULL; - Py_RETURN_NONE; + return resize(self, self->nbits - slicelength); } PyErr_SetString(PyExc_TypeError, "index or slice expected"); - return NULL; + return -1; } -/* ---------- number methods ---------- */ - static PyObject * -bitarray_add(bitarrayobject *self, PyObject *other) +bitarray_concat(bitarrayobject *self, PyObject *other) { - PyObject *res; - - res = bitarray_copy(self); + PyObject *res = bitarray_copy(self); if (extend_dispatch((bitarrayobject *) res, other) < 0) { Py_DECREF(res); return NULL; @@ -2069,7 +2052,7 @@ bitarray_add(bitarrayobject *self, PyObject *other) } static PyObject * -bitarray_iadd(bitarrayobject *self, PyObject *other) +bitarray_inplace_concat(bitarrayobject *self, PyObject *other) { if (extend_dispatch(self, other) < 0) return NULL; @@ -2078,20 +2061,10 @@ bitarray_iadd(bitarrayobject *self, PyObject *other) } static PyObject * -bitarray_mul(bitarrayobject *self, PyObject *v) +bitarray_repeat(bitarrayobject *self, Py_ssize_t n) { - PyObject *res; - idx_t vi = 0; - - if (!IS_INDEX(v)) { - PyErr_SetString(PyExc_TypeError, - "integer value expected for bitarray repetition"); - return NULL; - } - if (getIndex(v, &vi) < 0) - return NULL; - res = bitarray_copy(self); - if (repeat((bitarrayobject *) res, vi) < 0) { + PyObject *res = bitarray_copy(self); + if (repeat((bitarrayobject *) res, n) < 0) { Py_DECREF(res); return NULL; } @@ -2099,23 +2072,35 @@ bitarray_mul(bitarrayobject *self, PyObject *v) } static PyObject * -bitarray_imul(bitarrayobject *self, PyObject *v) +bitarray_inplace_repeat(bitarrayobject *self, Py_ssize_t n) { - idx_t vi = 0; - - if (!IS_INDEX(v)) { - PyErr_SetString(PyExc_TypeError, - "integer value expected for in-place bitarray repetition"); - return NULL; - } - if (getIndex(v, &vi) < 0) - return NULL; - if (repeat(self, vi) < 0) + if (repeat(self, n) < 0) return NULL; Py_INCREF(self); return (PyObject *) self; } +static PySequenceMethods bitarray_as_sequence = { + (lenfunc)bitarray_length, /* sq_length */ + (binaryfunc)bitarray_concat, /* sq_concat */ + (ssizeargfunc)bitarray_repeat, /* sq_repeat */ + (ssizeargfunc)bitarray_item, /* sq_item */ + 0, /* sq_slice */ + (ssizeobjargproc)bitarray_ass_item, /* sq_ass_item */ + 0, /* sq_ass_slice */ + (objobjproc)bitarray_contains, /* sq_contains */ + (binaryfunc)bitarray_inplace_concat, /* sq_inplace_concat */ + (ssizeargfunc)bitarray_inplace_repeat /* sq_inplace_repeat */ +}; + +static PyMappingMethods bitarray_as_mapping = { + (lenfunc)bitarray_length, + (binaryfunc)bitarray_subscript, + (objobjargproc)bitarray_ass_subscript +}; + +/* ---------- number methods ---------- */ + static PyObject * bitarray_cpinvert(bitarrayobject *self) { @@ -2519,7 +2504,7 @@ bitarray_methods[] = { insert_doc}, {"invert", (PyCFunction) bitarray_invert, METH_NOARGS, invert_doc}, - {"length", (PyCFunction) bitarray_length, METH_NOARGS, + {"length", (PyCFunction) bitarray_py_length, METH_NOARGS, length_doc}, {"pack", (PyCFunction) bitarray_pack, METH_O, pack_doc}, @@ -2557,24 +2542,10 @@ bitarray_methods[] = { copy_doc}, {"__deepcopy__", (PyCFunction) bitarray_copy, METH_O, copy_doc}, - {"__len__", (PyCFunction) bitarray_length, METH_NOARGS, - len_doc}, - {"__contains__", (PyCFunction) bitarray_contains, METH_O, - contains_doc}, {"__reduce__", (PyCFunction) bitarray_reduce, METH_NOARGS, reduce_doc}, - /* slice methods */ - {"__delitem__", (PyCFunction) bitarray_delitem, METH_O, 0}, - {"__getitem__", (PyCFunction) bitarray_getitem, METH_O, 0}, - {"__setitem__", (PyCFunction) bitarray_setitem, METH_VARARGS, 0}, - /* number methods */ - {"__add__", (PyCFunction) bitarray_add, METH_O, 0}, - {"__iadd__", (PyCFunction) bitarray_iadd, METH_O, 0}, - {"__mul__", (PyCFunction) bitarray_mul, METH_O, 0}, - {"__rmul__", (PyCFunction) bitarray_mul, METH_O, 0}, - {"__imul__", (PyCFunction) bitarray_imul, METH_O, 0}, {"__and__", (PyCFunction) bitarray_and, METH_O, 0}, {"__or__", (PyCFunction) bitarray_or, METH_O, 0}, {"__xor__", (PyCFunction) bitarray_xor, METH_O, 0}, @@ -2945,8 +2916,8 @@ static PyTypeObject Bitarraytype = { 0, /* tp_compare */ (reprfunc) bitarray_repr, /* tp_repr */ 0, /* tp_as_number*/ - 0, /* tp_as_sequence */ - 0, /* tp_as_mapping */ + &bitarray_as_sequence, /* tp_as_sequence */ + &bitarray_as_mapping, /* tp_as_mapping */ 0, /* tp_hash */ 0, /* tp_call */ 0, /* tp_str */ diff --git a/test/bench_bitarray.py b/test/bench_bitarray.py new file mode 100644 index 000000000..c878b62ef --- /dev/null +++ b/test/bench_bitarray.py @@ -0,0 +1,31 @@ +import timeit + +def bench_sequence(): + print('Benchmarking sequence methods') + baseline = [] + for name, setup in [('list', 's = list(range(20));' + + 's1 = [1];' + + 's2 = list(range(1000000))'), + ('bitarray', 'from bitarray import bitarray;' + + 's = bitarray([0, 1]) * 10;' + + 's1 = bitarray([1]);' + + 's2 = bitarray(1000000)')]: + print('=== Testing ' + name) + for i, op in enumerate(['len(s)', '1 in s', + 's[0]', 's[0] = 1', 'del s2[-1]', + 's[1:-1]', 's[-2:-1] = s1', 'del s2[-1:]', + 's + s', 's * 2', 's += s1']): + t = min(timeit.repeat(op, setup)) + if i < len(baseline): + b = t / baseline[i] + else: + b = '' + baseline.append(t) + print('%-24s %.8f\t%s' % (op + ' took:', t, b)) + print('') + +def run(): + bench_sequence() + +if __name__ == '__main__': + run() From 7715c6664bd077c3575c487d8f657678c7768861 Mon Sep 17 00:00:00 2001 From: John-John Tedro Date: Sun, 23 Oct 2016 05:26:02 +0200 Subject: [PATCH 21/23] Introduce HAS_VECTORS and capture size variable --- bitarray/_bitarray.c | 52 +++++++++++++++++++++++--------------------- 1 file changed, 27 insertions(+), 25 deletions(-) diff --git a/bitarray/_bitarray.c b/bitarray/_bitarray.c index 21abb0892..d497f28c7 100644 --- a/bitarray/_bitarray.c +++ b/bitarray/_bitarray.c @@ -4,6 +4,11 @@ Author: Ilan Schnell */ +#define HAS_VECTORS (defined(__GNUC__) || defined(__clang__)) + +#if HAS_VECTORS +typedef char vec __attribute__((vector_size(16))); +#endif #define PY_SSIZE_T_CLEAN #include "Python.h" @@ -94,22 +99,16 @@ static PyTypeObject Bitarraytype; #define GETBIT(self, i) \ ((self)->ob_item[(i) / 8] & BITMASK((self)->endian, i) ? 1 : 0) -#define V16C_SIZE 16 - -#define IS_GCC defined(__GNUC__) - -#if IS_GCC -typedef char v16c __attribute__ ((vector_size (V16C_SIZE))); - +#if HAS_VECTORS /* * Perform bitwise operation OP on 16 bytes of memory at a time. */ -#define simd_v16uc_op(A, B, OP) do { \ - v16c __a, __b; \ - memcpy(&__a, A, V16C_SIZE); \ - memcpy(&__b, B, V16C_SIZE); \ - v16c __r = __a OP __b; \ - memcpy(A, &__r, V16C_SIZE); \ +#define vector_op(A, B, OP) do { \ + vec __a, __b, __r; \ + memcpy(&__a, A, sizeof(vec)); \ + memcpy(&__b, B, sizeof(vec)); \ + __r = __a OP __b; \ + memcpy(A, &__r, sizeof(vec)); \ } while(0); #endif @@ -2050,22 +2049,25 @@ bitarray_cpinvert(bitarrayobject *self) return res; } -#if IS_GCC -#define BITWISE_FUNC_INTERNAL(SELF, OTHER, OP, OPEQ) do { \ - Py_ssize_t i = 0; \ - \ - for (; i + V16C_SIZE < Py_SIZE(SELF); i += V16C_SIZE) { \ - simd_v16uc_op((SELF)->ob_item + i, (OTHER)->ob_item + i, OP); \ - } \ - \ - for (; i < Py_SIZE(SELF); ++i) { \ - (SELF)->ob_item[i] OPEQ (OTHER)->ob_item[i]; \ - } \ +#if HAS_VECTORS +#define BITWISE_FUNC_INTERNAL(SELF, OTHER, OP, OPEQ) do { \ + Py_ssize_t i = 0; \ + const Py_ssize_t size = Py_SIZE(SELF); \ + \ + for (; i + sizeof(vec) < size; i += sizeof(vec)) { \ + vector_op((SELF)->ob_item + i, (OTHER)->ob_item + i, OP); \ + } \ + \ + for (; i < size; ++i) { \ + (SELF)->ob_item[i] OPEQ (OTHER)->ob_item[i]; \ + } \ } while(0); #else #define BITWISE_FUNC_INTERNAL(SELF, OTHER, OP, OPEQ) do { \ Py_ssize_t i; \ - for (i = 0; i < Py_SIZE(SELF); ++i) { \ + const Py_ssize_t size = Py_SIZE(SELF); \ + \ + for (i = 0; i < size; ++i) { \ (SELF)->ob_item[i] OPEQ (OTHER)->ob_item[i]; \ } \ } while(0); From a79dcec1c3fb46c399f60dc2ab917dc16ef6534d Mon Sep 17 00:00:00 2001 From: John-John Tedro Date: Sun, 23 Oct 2016 19:55:33 +0200 Subject: [PATCH 22/23] Delete op_type enum --- bitarray/_bitarray.c | 7 ------- 1 file changed, 7 deletions(-) diff --git a/bitarray/_bitarray.c b/bitarray/_bitarray.c index d497f28c7..cd2be5ac6 100644 --- a/bitarray/_bitarray.c +++ b/bitarray/_bitarray.c @@ -345,13 +345,6 @@ repeat(bitarrayobject *self, idx_t n) return 0; } - -enum op_type { - OP_and, - OP_or, - OP_xor, -}; - /* set the bits from start to stop (excluding) in self to val */ static void setrange(bitarrayobject *self, idx_t start, idx_t stop, int val) From 0b09add58363ee4654cf5abd27a9029348f0dca9 Mon Sep 17 00:00:00 2001 From: John-John Tedro Date: Sun, 23 Oct 2016 20:39:33 +0200 Subject: [PATCH 23/23] Optimize loop to reduce moves --- bitarray/_bitarray.c | 24 +++++++++++++----------- 1 file changed, 13 insertions(+), 11 deletions(-) diff --git a/bitarray/_bitarray.c b/bitarray/_bitarray.c index cd2be5ac6..06fc49ec3 100644 --- a/bitarray/_bitarray.c +++ b/bitarray/_bitarray.c @@ -2043,17 +2043,19 @@ bitarray_cpinvert(bitarrayobject *self) } #if HAS_VECTORS -#define BITWISE_FUNC_INTERNAL(SELF, OTHER, OP, OPEQ) do { \ - Py_ssize_t i = 0; \ - const Py_ssize_t size = Py_SIZE(SELF); \ - \ - for (; i + sizeof(vec) < size; i += sizeof(vec)) { \ - vector_op((SELF)->ob_item + i, (OTHER)->ob_item + i, OP); \ - } \ - \ - for (; i < size; ++i) { \ - (SELF)->ob_item[i] OPEQ (OTHER)->ob_item[i]; \ - } \ +#define BITWISE_FUNC_INTERNAL(SELF, OTHER, OP, OPEQ) do { \ + Py_ssize_t i = 0; \ + const Py_ssize_t size = Py_SIZE(SELF); \ + char* self_ob_item = (SELF)->ob_item; \ + const char* other_ob_item = (OTHER)->ob_item; \ + \ + for (; i + sizeof(vec) < size; i += sizeof(vec)) { \ + vector_op(self_ob_item + i, other_ob_item + i, OP); \ + } \ + \ + for (; i < size; ++i) { \ + self_ob_item[i] OPEQ other_ob_item[i]; \ + } \ } while(0); #else #define BITWISE_FUNC_INTERNAL(SELF, OTHER, OP, OPEQ) do { \