diff -urN brotli-1.0.9/python/_brotli.cc brotli-1.0.9-modified/python/_brotli.cc --- brotli-1.0.9/python/_brotli.cc 2020-08-27 19:42:55.000000000 +0530 +++ brotli-1.0.9-modified/python/_brotli.cc 2025-12-31 09:12:07.925323925 +0530 @@ -432,10 +432,14 @@ typedef struct { PyObject_HEAD BrotliDecoderState* dec; + uint8_t* unconsumed_data; + size_t unconsumed_data_length; } brotli_Decompressor; static void brotli_Decompressor_dealloc(brotli_Decompressor* self) { BrotliDecoderDestroyInstance(self->dec); + if (self->unconsumed_data) + free(self->unconsumed_data); #if PY_MAJOR_VERSION >= 3 Py_TYPE(self)->tp_free((PyObject*)self); #else @@ -451,6 +455,9 @@ self->dec = BrotliDecoderCreateInstance(0, 0, 0); } + self->unconsumed_data = NULL; + self->unconsumed_data_length = 0; + return (PyObject *)self; } @@ -469,6 +476,78 @@ return 0; } +static PyObject* decompress_stream(brotli_Decompressor* self, + uint8_t* input, size_t input_length, Py_ssize_t max_output_length) { + BrotliDecoderResult result; + + size_t available_in = input_length; + const uint8_t* next_in = input; + + size_t available_out; + uint8_t* next_out; + uint8_t* new_tail; + BlocksOutputBuffer buffer = {.list=NULL}; + PyObject *ret; + + if (BlocksOutputBuffer_InitAndGrow(&buffer, PY_SSIZE_T_MAX, &available_out, &next_out) < 0) { + goto error; + } + + while (1) { + Py_BEGIN_ALLOW_THREADS + result = BrotliDecoderDecompressStream(self->dec, + &available_in, &next_in, + &available_out, &next_out, NULL); + Py_END_ALLOW_THREADS + + if (result == BROTLI_DECODER_RESULT_NEEDS_MORE_OUTPUT) { + if (available_out == 0) { + if (buffer.allocated == PY_SSIZE_T_MAX) { + PyErr_SetString(PyExc_MemoryError, unable_allocate_msg); + goto error; + } + if (buffer.allocated == max_output_length) { + // We've reached the output length limit. + break; + } + if (BlocksOutputBuffer_Grow(&buffer, &available_out, &next_out) < 0) { + goto error; + } + } + continue; + } + + if (result == BROTLI_DECODER_RESULT_ERROR || available_in != 0) { + available_in = 0; + goto error; + } + + break; + } + + ret = BlocksOutputBuffer_Finish(&buffer, available_out); + if (ret != NULL) { + goto finally; + } + +error: + BlocksOutputBuffer_OnError(&buffer); + ret = NULL; + +finally: + new_tail = available_in > 0 ? malloc(available_in) : NULL; + if (available_in > 0) { + memcpy(new_tail, next_in, available_in); + } + if (self->unconsumed_data) { + free(self->unconsumed_data); + } + self->unconsumed_data = new_tail; + self->unconsumed_data_length = available_in; + + return ret; +} + PyDoc_STRVAR(brotli_Decompressor_process_doc, "Process \"string\" for decompression, returning a string that contains \n" "decompressed output data. This data should be concatenated to the output \n" @@ -476,29 +555,40 @@ "Some or all of the input may be kept in internal buffers for later \n" "processing, and the decompressed output data may be empty until enough input \n" "has been accumulated.\n" +"If max_output_length is set, no more than max_output_length bytes will be\n" +"returned. If the limit is reached, further calls to process (potentially with\n" +"empty input) will continue to yield more data. If, after returning a string of\n" +"the length equal to limit, can_accept_more_data() returns False, process()\n" +"must only be called with empty input until can_accept_more_data() once again\n" +"returns True.\n" "\n" "Signature:\n" -" decompress(string)\n" +" decompress(string, max_output_length=int)\n" "\n" "Args:\n" " string (bytes): The input data\n" -"\n" -"Returns:\n" +"\n""Returns:\n" " The decompressed output data (bytes)\n" "\n" "Raises:\n" " brotli.error: If decompression fails\n"); -static PyObject* brotli_Decompressor_process(brotli_Decompressor *self, PyObject *args) { +static PyObject* brotli_Decompressor_process(brotli_Decompressor *self, PyObject *args, PyObject* keywds) { PyObject* ret = NULL; std::vector output; Py_buffer input; BROTLI_BOOL ok = BROTLI_TRUE; + Py_ssize_t max_output_length = PY_SSIZE_T_MAX; + uint8_t* data; + size_t data_length; + + static char* kwlist[] = { "", "max_output_length", NULL }; + #if PY_MAJOR_VERSION >= 3 - ok = (BROTLI_BOOL)PyArg_ParseTuple(args, "y*:process", &input); + ok = PyArg_ParseTupleAndKeywords(args, keywds, "y*|n:process", kwlist, &input, &max_output_length); #else - ok = (BROTLI_BOOL)PyArg_ParseTuple(args, "s*:process", &input); + ok = PyArg_ParseTupleAndKeywords(args, keywds, "s*|n:process", kwlist, &input, &max_output_length); #endif if (!ok) @@ -509,7 +599,20 @@ goto end; } - ok = decompress_stream(self->dec, &output, static_cast(input.buf), input.len); +if (self->unconsumed_data_length > 0) { + if (input.len > 0) { + PyErr_SetString(BrotliError, "process called with data when accept_more_data is False"); + ret = NULL; + goto finally; + } + data = self->unconsumed_data; + data_length = self->unconsumed_data_length; + } else { + data = (uint8_t*)input.buf; + data_length = input.len; + } + + ret = decompress_stream(self, data, data_length, max_output_length); end: PyBuffer_Release(&input); @@ -522,6 +625,28 @@ return ret; } +PyDoc_STRVAR(brotli_Decompressor_can_accept_more_data_doc, +"Checks if the decoder instance can accept more compressed data. If the decompress()\n" +"method on this instance of decompressor was never called with max_length,\n" +"this method will always return True.\n" +"\n" +"Signature:" +" can_accept_more_data()\n" +"\n" +"Returns:\n" +" True if the decoder is ready to accept more compressed data via decompress()\n" +" False if the decoder needs to output some data via decompress(b'') before\n" +" being provided any more compressed data\n"); + +static PyObject* brotli_Decompressor_can_accept_more_data(brotli_Decompressor* self) { + if (self->unconsumed_data_length > 0) { + Py_RETURN_FALSE; + } else { + Py_RETURN_TRUE; + } +} + + PyDoc_STRVAR(brotli_Decompressor_is_finished_doc, "Checks if decoder instance reached the final state.\n" "\n" @@ -568,8 +693,9 @@ }; static PyMethodDef brotli_Decompressor_methods[] = { - {"process", (PyCFunction)brotli_Decompressor_process, METH_VARARGS, brotli_Decompressor_process_doc}, + {"process", (PyCFunction)brotli_Decompressor_process, METH_VARARGS| METH_KEYWORDS, brotli_Decompressor_process_doc}, {"is_finished", (PyCFunction)brotli_Decompressor_is_finished, METH_NOARGS, brotli_Decompressor_is_finished_doc}, + {"can_accept_more_data", (PyCFunction)brotli_Decompressor_can_accept_more_data, METH_NOARGS, brotli_Decompressor_can_accept_more_data_doc}, {NULL} /* Sentinel */ };