|
3 | 3 | #include "Python.h" |
4 | 4 | #include "pycore_abstract.h" // _PyIndex_Check() |
5 | 5 | #include "pycore_bytes_methods.h" // _Py_bytes_contains() |
| 6 | +#include "pycore_bytesobject.h" // _PyBytes_Find |
6 | 7 | #include "pycore_long.h" // _PyLong_FromUnsignedChar() |
7 | 8 |
|
8 | 9 | PyDoc_STRVAR(bytesview_doc, |
@@ -355,13 +356,227 @@ bytesview_decode(PyBytesViewObject *self, PyObject *args, PyObject *kwargs) |
355 | 356 | return PyUnicode_Decode(self->ptr, self->len, encoding, errors); |
356 | 357 | } |
357 | 358 |
|
| 359 | +static PyObject * |
| 360 | +bytesview_find(PyBytesViewObject *self, PyObject *args) |
| 361 | +{ |
| 362 | + PyObject *subobj; |
| 363 | + Py_ssize_t start = 0, end = PY_SSIZE_T_MAX; |
| 364 | + if (!PyArg_ParseTuple(args, "O|nn:find", &subobj, &start, &end)) { |
| 365 | + return NULL; |
| 366 | + } |
| 367 | + |
| 368 | + const char *sub; |
| 369 | + Py_ssize_t sublen; |
| 370 | + Py_buffer view; |
| 371 | + int needs_release = 0; |
| 372 | + |
| 373 | + if (PyBytes_Check(subobj)) { |
| 374 | + sub = PyBytes_AS_STRING(subobj); |
| 375 | + sublen = PyBytes_GET_SIZE(subobj); |
| 376 | + } else if (PyBytesView_Check(subobj)) { |
| 377 | + sub = ((PyBytesViewObject *)subobj)->ptr; |
| 378 | + sublen = ((PyBytesViewObject *)subobj)->len; |
| 379 | + } else if (PyObject_GetBuffer(subobj, &view, PyBUF_SIMPLE) == 0) { |
| 380 | + sub = (const char *)view.buf; |
| 381 | + sublen = view.len; |
| 382 | + needs_release = 1; |
| 383 | + } else { |
| 384 | + PyErr_SetString(PyExc_TypeError, "expected bytes-like object"); |
| 385 | + return NULL; |
| 386 | + } |
| 387 | + |
| 388 | + if (start < 0) start += self->len; |
| 389 | + if (start < 0) start = 0; |
| 390 | + if (end < 0) end += self->len; |
| 391 | + if (end > self->len) end = self->len; |
| 392 | + |
| 393 | + Py_ssize_t result = -1; |
| 394 | + if (start <= end && sublen <= end - start) { |
| 395 | + result = _PyBytes_Find(self->ptr + start, end - start, sub, sublen, start); |
| 396 | + } |
| 397 | + |
| 398 | + if (needs_release) { |
| 399 | + PyBuffer_Release(&view); |
| 400 | + } |
| 401 | + return PyLong_FromSsize_t(result); |
| 402 | +} |
| 403 | + |
| 404 | +static PyObject * |
| 405 | +bytesview_partition(PyBytesViewObject *self, PyObject *sepobj) |
| 406 | +{ |
| 407 | + const char *sep; |
| 408 | + Py_ssize_t seplen; |
| 409 | + Py_buffer view; |
| 410 | + int needs_release = 0; |
| 411 | + |
| 412 | + if (PyBytes_Check(sepobj)) { |
| 413 | + sep = PyBytes_AS_STRING(sepobj); |
| 414 | + seplen = PyBytes_GET_SIZE(sepobj); |
| 415 | + } else if (PyBytesView_Check(sepobj)) { |
| 416 | + sep = ((PyBytesViewObject *)sepobj)->ptr; |
| 417 | + seplen = ((PyBytesViewObject *)sepobj)->len; |
| 418 | + } else if (PyObject_GetBuffer(sepobj, &view, PyBUF_SIMPLE) == 0) { |
| 419 | + sep = (const char *)view.buf; |
| 420 | + seplen = view.len; |
| 421 | + needs_release = 1; |
| 422 | + } else { |
| 423 | + PyErr_SetString(PyExc_TypeError, "expected bytes-like object"); |
| 424 | + return NULL; |
| 425 | + } |
| 426 | + |
| 427 | + if (seplen == 0) { |
| 428 | + if (needs_release) PyBuffer_Release(&view); |
| 429 | + PyErr_SetString(PyExc_ValueError, "empty separator"); |
| 430 | + return NULL; |
| 431 | + } |
| 432 | + |
| 433 | + // Find separator using optimized fastsearch |
| 434 | + Py_ssize_t pos = _PyBytes_Find(self->ptr, self->len, sep, seplen, 0); |
| 435 | + |
| 436 | + if (needs_release) { |
| 437 | + PyBuffer_Release(&view); |
| 438 | + } |
| 439 | + |
| 440 | + PyObject *before, *middle, *after; |
| 441 | + if (pos >= 0) { |
| 442 | + before = bytesview_new_from_owner(&PyBytesView_Type, self->owner, |
| 443 | + self->ptr, pos); |
| 444 | + middle = bytesview_new_from_owner(&PyBytesView_Type, self->owner, |
| 445 | + self->ptr + pos, seplen); |
| 446 | + after = bytesview_new_from_owner(&PyBytesView_Type, self->owner, |
| 447 | + self->ptr + pos + seplen, |
| 448 | + self->len - pos - seplen); |
| 449 | + } else { |
| 450 | + before = bytesview_new_from_owner(&PyBytesView_Type, self->owner, |
| 451 | + self->ptr, self->len); |
| 452 | + middle = bytesview_new_from_owner(&PyBytesView_Type, self->owner, |
| 453 | + self->ptr + self->len, 0); |
| 454 | + after = bytesview_new_from_owner(&PyBytesView_Type, self->owner, |
| 455 | + self->ptr + self->len, 0); |
| 456 | + } |
| 457 | + |
| 458 | + if (!before || !middle || !after) { |
| 459 | + Py_XDECREF(before); |
| 460 | + Py_XDECREF(middle); |
| 461 | + Py_XDECREF(after); |
| 462 | + return NULL; |
| 463 | + } |
| 464 | + |
| 465 | + return PyTuple_Pack(3, before, middle, after); |
| 466 | +} |
| 467 | + |
| 468 | +static PyObject * |
| 469 | +bytesview_split(PyBytesViewObject *self, PyObject *args, PyObject *kwargs) |
| 470 | +{ |
| 471 | + PyObject *sepobj = Py_None; |
| 472 | + Py_ssize_t maxsplit = -1; |
| 473 | + static char *kwlist[] = {"sep", "maxsplit", NULL}; |
| 474 | + |
| 475 | + if (!PyArg_ParseTupleAndKeywords(args, kwargs, "|On:split", kwlist, |
| 476 | + &sepobj, &maxsplit)) { |
| 477 | + return NULL; |
| 478 | + } |
| 479 | + |
| 480 | + if (sepobj == Py_None) { |
| 481 | + // Whitespace splitting not implemented - fall back to bytes |
| 482 | + PyObject *bytes = PyBytes_FromStringAndSize(self->ptr, self->len); |
| 483 | + if (!bytes) return NULL; |
| 484 | + PyObject *result = PyObject_CallMethod(bytes, "split", NULL); |
| 485 | + Py_DECREF(bytes); |
| 486 | + return result; |
| 487 | + } |
| 488 | + |
| 489 | + const char *sep; |
| 490 | + Py_ssize_t seplen; |
| 491 | + Py_buffer view; |
| 492 | + int needs_release = 0; |
| 493 | + |
| 494 | + if (PyBytes_Check(sepobj)) { |
| 495 | + sep = PyBytes_AS_STRING(sepobj); |
| 496 | + seplen = PyBytes_GET_SIZE(sepobj); |
| 497 | + } else if (PyBytesView_Check(sepobj)) { |
| 498 | + sep = ((PyBytesViewObject *)sepobj)->ptr; |
| 499 | + seplen = ((PyBytesViewObject *)sepobj)->len; |
| 500 | + } else if (PyObject_GetBuffer(sepobj, &view, PyBUF_SIMPLE) == 0) { |
| 501 | + sep = (const char *)view.buf; |
| 502 | + seplen = view.len; |
| 503 | + needs_release = 1; |
| 504 | + } else { |
| 505 | + PyErr_SetString(PyExc_TypeError, "expected bytes-like object"); |
| 506 | + return NULL; |
| 507 | + } |
| 508 | + |
| 509 | + if (seplen == 0) { |
| 510 | + if (needs_release) PyBuffer_Release(&view); |
| 511 | + PyErr_SetString(PyExc_ValueError, "empty separator"); |
| 512 | + return NULL; |
| 513 | + } |
| 514 | + |
| 515 | + PyObject *list = PyList_New(0); |
| 516 | + if (!list) { |
| 517 | + if (needs_release) PyBuffer_Release(&view); |
| 518 | + return NULL; |
| 519 | + } |
| 520 | + |
| 521 | + const char *start = self->ptr; |
| 522 | + const char *end = self->ptr + self->len; |
| 523 | + Py_ssize_t splits = 0; |
| 524 | + |
| 525 | + while (start <= end - seplen) { |
| 526 | + if (maxsplit >= 0 && splits >= maxsplit) { |
| 527 | + break; |
| 528 | + } |
| 529 | + Py_ssize_t pos = _PyBytes_Find(start, end - start, sep, seplen, 0); |
| 530 | + if (pos < 0) { |
| 531 | + break; |
| 532 | + } |
| 533 | + const char *match = start + pos; |
| 534 | + PyObject *part = bytesview_new_from_owner( |
| 535 | + &PyBytesView_Type, self->owner, start, match - start); |
| 536 | + if (!part || PyList_Append(list, part) < 0) { |
| 537 | + Py_XDECREF(part); |
| 538 | + Py_DECREF(list); |
| 539 | + if (needs_release) PyBuffer_Release(&view); |
| 540 | + return NULL; |
| 541 | + } |
| 542 | + Py_DECREF(part); |
| 543 | + start = match + seplen; |
| 544 | + splits++; |
| 545 | + } |
| 546 | + |
| 547 | + // Add final part |
| 548 | + PyObject *part = bytesview_new_from_owner( |
| 549 | + &PyBytesView_Type, self->owner, start, end - start); |
| 550 | + if (!part || PyList_Append(list, part) < 0) { |
| 551 | + Py_XDECREF(part); |
| 552 | + Py_DECREF(list); |
| 553 | + if (needs_release) PyBuffer_Release(&view); |
| 554 | + return NULL; |
| 555 | + } |
| 556 | + Py_DECREF(part); |
| 557 | + |
| 558 | + if (needs_release) { |
| 559 | + PyBuffer_Release(&view); |
| 560 | + } |
| 561 | + return list; |
| 562 | +} |
| 563 | + |
358 | 564 | static PyMethodDef bytesview_methods[] = { |
359 | 565 | {"__bytes__", (PyCFunction)bytesview_bytes, METH_NOARGS, |
360 | 566 | PyDoc_STR("Return a bytes object containing the bytesview data.")}, |
361 | 567 | {"tobytes", (PyCFunction)bytesview_bytes, METH_NOARGS, |
362 | 568 | PyDoc_STR("Return a bytes object containing the bytesview data.")}, |
363 | 569 | {"decode", (PyCFunction)bytesview_decode, METH_VARARGS | METH_KEYWORDS, |
364 | 570 | bytesview_decode_doc}, |
| 571 | + {"find", (PyCFunction)bytesview_find, METH_VARARGS, |
| 572 | + PyDoc_STR("find(sub[, start[, end]]) -> int\n\n" |
| 573 | + "Return lowest index where sub is found, or -1 if not found.")}, |
| 574 | + {"partition", (PyCFunction)bytesview_partition, METH_O, |
| 575 | + PyDoc_STR("partition(sep) -> (before, sep, after)\n\n" |
| 576 | + "Partition into three bytesview objects (zero-copy).")}, |
| 577 | + {"split", (PyCFunction)bytesview_split, METH_VARARGS | METH_KEYWORDS, |
| 578 | + PyDoc_STR("split(sep=None, maxsplit=-1) -> list of bytesview\n\n" |
| 579 | + "Split into list of bytesview objects (zero-copy).")}, |
365 | 580 | {NULL, NULL} |
366 | 581 | }; |
367 | 582 |
|
|
0 commit comments