--- a/python/libxml.c 2025-04-26 02:57:30.000000000 +0200 +++ b/python/libxml.c 2025-04-26 03:45:26.710195828 +0200 @@ -286,7 +286,9 @@ #endif file = (PyObject *) context; if (file == NULL) return(-1); - ret = PyEval_CallMethod(file, (char *) "read", (char *) "(i)", len); + /* When read() returns a string, the length is in characters not bytes, so + request at most len / 4 characters to leave space for UTF-8 encoding. */ + ret = PyObject_CallMethod(file, (char *) "read", (char *) "(i)", len / 4); if (ret == NULL) { printf("xmlPythonFileReadRaw: result is NULL\n"); return(-1); @@ -321,10 +323,12 @@ Py_DECREF(ret); return(-1); } - if (lenread > len) - memcpy(buffer, data, len); - else - memcpy(buffer, data, lenread); + if (lenread < 0 || lenread > len) { + printf("xmlPythonFileReadRaw: invalid lenread\n"); + Py_DECREF(ret); + return(-1); + } + memcpy(buffer, data, lenread); Py_DECREF(ret); return(lenread); } @@ -351,7 +355,9 @@ #endif file = (PyObject *) context; if (file == NULL) return(-1); - ret = PyEval_CallMethod(file, (char *) "io_read", (char *) "(i)", len); + /* When io_read() returns a string, the length is in characters not bytes, so + request at most len / 4 characters to leave space for UTF-8 encoding. */ + ret = PyObject_CallMethod(file, (char *) "io_read", (char *) "(i)", len / 4); if (ret == NULL) { printf("xmlPythonFileRead: result is NULL\n"); return(-1); @@ -386,10 +392,12 @@ Py_DECREF(ret); return(-1); } - if (lenread > len) - memcpy(buffer, data, len); - else - memcpy(buffer, data, lenread); + if (lenread < 0 || lenread > len) { + printf("xmlPythonFileRead: invalid lenread\n"); + Py_DECREF(ret); + return(-1); + } + memcpy(buffer, data, lenread); Py_DECREF(ret); return(lenread); }