Skip to content

Commit 72c1267

Browse files
committed
view, partition, split
1 parent 26d0314 commit 72c1267

File tree

1 file changed

+215
-0
lines changed

1 file changed

+215
-0
lines changed

Objects/bytesviewobject.c

Lines changed: 215 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3,6 +3,7 @@
33
#include "Python.h"
44
#include "pycore_abstract.h" // _PyIndex_Check()
55
#include "pycore_bytes_methods.h" // _Py_bytes_contains()
6+
#include "pycore_bytesobject.h" // _PyBytes_Find
67
#include "pycore_long.h" // _PyLong_FromUnsignedChar()
78

89
PyDoc_STRVAR(bytesview_doc,
@@ -355,13 +356,227 @@ bytesview_decode(PyBytesViewObject *self, PyObject *args, PyObject *kwargs)
355356
return PyUnicode_Decode(self->ptr, self->len, encoding, errors);
356357
}
357358

359+
static PyObject *
360+
bytesview_find(PyBytesViewObject *self, PyObject *args)
361+
{
362+
PyObject *subobj;
363+
Py_ssize_t start = 0, end = PY_SSIZE_T_MAX;
364+
if (!PyArg_ParseTuple(args, "O|nn:find", &subobj, &start, &end)) {
365+
return NULL;
366+
}
367+
368+
const char *sub;
369+
Py_ssize_t sublen;
370+
Py_buffer view;
371+
int needs_release = 0;
372+
373+
if (PyBytes_Check(subobj)) {
374+
sub = PyBytes_AS_STRING(subobj);
375+
sublen = PyBytes_GET_SIZE(subobj);
376+
} else if (PyBytesView_Check(subobj)) {
377+
sub = ((PyBytesViewObject *)subobj)->ptr;
378+
sublen = ((PyBytesViewObject *)subobj)->len;
379+
} else if (PyObject_GetBuffer(subobj, &view, PyBUF_SIMPLE) == 0) {
380+
sub = (const char *)view.buf;
381+
sublen = view.len;
382+
needs_release = 1;
383+
} else {
384+
PyErr_SetString(PyExc_TypeError, "expected bytes-like object");
385+
return NULL;
386+
}
387+
388+
if (start < 0) start += self->len;
389+
if (start < 0) start = 0;
390+
if (end < 0) end += self->len;
391+
if (end > self->len) end = self->len;
392+
393+
Py_ssize_t result = -1;
394+
if (start <= end && sublen <= end - start) {
395+
result = _PyBytes_Find(self->ptr + start, end - start, sub, sublen, start);
396+
}
397+
398+
if (needs_release) {
399+
PyBuffer_Release(&view);
400+
}
401+
return PyLong_FromSsize_t(result);
402+
}
403+
404+
static PyObject *
405+
bytesview_partition(PyBytesViewObject *self, PyObject *sepobj)
406+
{
407+
const char *sep;
408+
Py_ssize_t seplen;
409+
Py_buffer view;
410+
int needs_release = 0;
411+
412+
if (PyBytes_Check(sepobj)) {
413+
sep = PyBytes_AS_STRING(sepobj);
414+
seplen = PyBytes_GET_SIZE(sepobj);
415+
} else if (PyBytesView_Check(sepobj)) {
416+
sep = ((PyBytesViewObject *)sepobj)->ptr;
417+
seplen = ((PyBytesViewObject *)sepobj)->len;
418+
} else if (PyObject_GetBuffer(sepobj, &view, PyBUF_SIMPLE) == 0) {
419+
sep = (const char *)view.buf;
420+
seplen = view.len;
421+
needs_release = 1;
422+
} else {
423+
PyErr_SetString(PyExc_TypeError, "expected bytes-like object");
424+
return NULL;
425+
}
426+
427+
if (seplen == 0) {
428+
if (needs_release) PyBuffer_Release(&view);
429+
PyErr_SetString(PyExc_ValueError, "empty separator");
430+
return NULL;
431+
}
432+
433+
// Find separator using optimized fastsearch
434+
Py_ssize_t pos = _PyBytes_Find(self->ptr, self->len, sep, seplen, 0);
435+
436+
if (needs_release) {
437+
PyBuffer_Release(&view);
438+
}
439+
440+
PyObject *before, *middle, *after;
441+
if (pos >= 0) {
442+
before = bytesview_new_from_owner(&PyBytesView_Type, self->owner,
443+
self->ptr, pos);
444+
middle = bytesview_new_from_owner(&PyBytesView_Type, self->owner,
445+
self->ptr + pos, seplen);
446+
after = bytesview_new_from_owner(&PyBytesView_Type, self->owner,
447+
self->ptr + pos + seplen,
448+
self->len - pos - seplen);
449+
} else {
450+
before = bytesview_new_from_owner(&PyBytesView_Type, self->owner,
451+
self->ptr, self->len);
452+
middle = bytesview_new_from_owner(&PyBytesView_Type, self->owner,
453+
self->ptr + self->len, 0);
454+
after = bytesview_new_from_owner(&PyBytesView_Type, self->owner,
455+
self->ptr + self->len, 0);
456+
}
457+
458+
if (!before || !middle || !after) {
459+
Py_XDECREF(before);
460+
Py_XDECREF(middle);
461+
Py_XDECREF(after);
462+
return NULL;
463+
}
464+
465+
return PyTuple_Pack(3, before, middle, after);
466+
}
467+
468+
static PyObject *
469+
bytesview_split(PyBytesViewObject *self, PyObject *args, PyObject *kwargs)
470+
{
471+
PyObject *sepobj = Py_None;
472+
Py_ssize_t maxsplit = -1;
473+
static char *kwlist[] = {"sep", "maxsplit", NULL};
474+
475+
if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|On:split", kwlist,
476+
&sepobj, &maxsplit)) {
477+
return NULL;
478+
}
479+
480+
if (sepobj == Py_None) {
481+
// Whitespace splitting not implemented - fall back to bytes
482+
PyObject *bytes = PyBytes_FromStringAndSize(self->ptr, self->len);
483+
if (!bytes) return NULL;
484+
PyObject *result = PyObject_CallMethod(bytes, "split", NULL);
485+
Py_DECREF(bytes);
486+
return result;
487+
}
488+
489+
const char *sep;
490+
Py_ssize_t seplen;
491+
Py_buffer view;
492+
int needs_release = 0;
493+
494+
if (PyBytes_Check(sepobj)) {
495+
sep = PyBytes_AS_STRING(sepobj);
496+
seplen = PyBytes_GET_SIZE(sepobj);
497+
} else if (PyBytesView_Check(sepobj)) {
498+
sep = ((PyBytesViewObject *)sepobj)->ptr;
499+
seplen = ((PyBytesViewObject *)sepobj)->len;
500+
} else if (PyObject_GetBuffer(sepobj, &view, PyBUF_SIMPLE) == 0) {
501+
sep = (const char *)view.buf;
502+
seplen = view.len;
503+
needs_release = 1;
504+
} else {
505+
PyErr_SetString(PyExc_TypeError, "expected bytes-like object");
506+
return NULL;
507+
}
508+
509+
if (seplen == 0) {
510+
if (needs_release) PyBuffer_Release(&view);
511+
PyErr_SetString(PyExc_ValueError, "empty separator");
512+
return NULL;
513+
}
514+
515+
PyObject *list = PyList_New(0);
516+
if (!list) {
517+
if (needs_release) PyBuffer_Release(&view);
518+
return NULL;
519+
}
520+
521+
const char *start = self->ptr;
522+
const char *end = self->ptr + self->len;
523+
Py_ssize_t splits = 0;
524+
525+
while (start <= end - seplen) {
526+
if (maxsplit >= 0 && splits >= maxsplit) {
527+
break;
528+
}
529+
Py_ssize_t pos = _PyBytes_Find(start, end - start, sep, seplen, 0);
530+
if (pos < 0) {
531+
break;
532+
}
533+
const char *match = start + pos;
534+
PyObject *part = bytesview_new_from_owner(
535+
&PyBytesView_Type, self->owner, start, match - start);
536+
if (!part || PyList_Append(list, part) < 0) {
537+
Py_XDECREF(part);
538+
Py_DECREF(list);
539+
if (needs_release) PyBuffer_Release(&view);
540+
return NULL;
541+
}
542+
Py_DECREF(part);
543+
start = match + seplen;
544+
splits++;
545+
}
546+
547+
// Add final part
548+
PyObject *part = bytesview_new_from_owner(
549+
&PyBytesView_Type, self->owner, start, end - start);
550+
if (!part || PyList_Append(list, part) < 0) {
551+
Py_XDECREF(part);
552+
Py_DECREF(list);
553+
if (needs_release) PyBuffer_Release(&view);
554+
return NULL;
555+
}
556+
Py_DECREF(part);
557+
558+
if (needs_release) {
559+
PyBuffer_Release(&view);
560+
}
561+
return list;
562+
}
563+
358564
static PyMethodDef bytesview_methods[] = {
359565
{"__bytes__", (PyCFunction)bytesview_bytes, METH_NOARGS,
360566
PyDoc_STR("Return a bytes object containing the bytesview data.")},
361567
{"tobytes", (PyCFunction)bytesview_bytes, METH_NOARGS,
362568
PyDoc_STR("Return a bytes object containing the bytesview data.")},
363569
{"decode", (PyCFunction)bytesview_decode, METH_VARARGS | METH_KEYWORDS,
364570
bytesview_decode_doc},
571+
{"find", (PyCFunction)bytesview_find, METH_VARARGS,
572+
PyDoc_STR("find(sub[, start[, end]]) -> int\n\n"
573+
"Return lowest index where sub is found, or -1 if not found.")},
574+
{"partition", (PyCFunction)bytesview_partition, METH_O,
575+
PyDoc_STR("partition(sep) -> (before, sep, after)\n\n"
576+
"Partition into three bytesview objects (zero-copy).")},
577+
{"split", (PyCFunction)bytesview_split, METH_VARARGS | METH_KEYWORDS,
578+
PyDoc_STR("split(sep=None, maxsplit=-1) -> list of bytesview\n\n"
579+
"Split into list of bytesview objects (zero-copy).")},
365580
{NULL, NULL}
366581
};
367582

0 commit comments

Comments
 (0)