| 1 | --- a/Lib/test/test_file2k.py 2018-02-16 17:49:45.180147747 -0500 |
| 2 | +++ b/Lib/test/test_file2k.py 2018-02-16 17:51:06.870149602 -0500 |
| 3 | @@ -652,6 +652,33 @@ class FileThreadingTests(unittest.TestCa |
| 4 | self.f.writelines('') |
| 5 | self._test_close_open_io(io_func) |
| 6 | |
| 7 | + def test_iteration_torture(self): |
| 8 | + # bpo-31530 |
| 9 | + with open(self.filename, "wb") as fp: |
| 10 | + for i in xrange(2**20): |
| 11 | + fp.write(b"0"*50 + b"\n") |
| 12 | + with open(self.filename, "rb") as f: |
| 13 | + def it(): |
| 14 | + for l in f: |
| 15 | + pass |
| 16 | + self._run_workers(it, 10) |
| 17 | + |
| 18 | + def test_iteration_seek(self): |
| 19 | + # bpo-31530: Crash when concurrently seek and iterate over a file. |
| 20 | + with open(self.filename, "wb") as fp: |
| 21 | + for i in xrange(10000): |
| 22 | + fp.write(b"0"*50 + b"\n") |
| 23 | + with open(self.filename, "rb") as f: |
| 24 | + it = iter([1] + [0]*10) # one thread reads, others seek |
| 25 | + def iterate(): |
| 26 | + if next(it): |
| 27 | + for l in f: |
| 28 | + pass |
| 29 | + else: |
| 30 | + for i in xrange(100): |
| 31 | + f.seek(i*100, 0) |
| 32 | + self._run_workers(iterate, 10) |
| 33 | + |
| 34 | |
| 35 | @unittest.skipUnless(os.name == 'posix', 'test requires a posix system.') |
| 36 | class TestFileSignalEINTR(unittest.TestCase): |
| 37 | --- a/Objects/fileobject.c 2018-02-16 17:49:45.304147750 -0500 |
| 38 | +++ b/Objects/fileobject.c 2018-02-16 17:51:06.872149603 -0500 |
| 39 | @@ -430,7 +430,7 @@ close_the_file(PyFileObject *f) |
| 40 | if (f->ob_refcnt > 0) { |
| 41 | PyErr_SetString(PyExc_IOError, |
| 42 | "close() called during concurrent " |
| 43 | - "operation on the same file object."); |
| 44 | + "operation on the same file object"); |
| 45 | } else { |
| 46 | /* This should not happen unless someone is |
| 47 | * carelessly playing with the PyFileObject |
| 48 | @@ -438,7 +438,7 @@ close_the_file(PyFileObject *f) |
| 49 | * pointer. */ |
| 50 | PyErr_SetString(PyExc_SystemError, |
| 51 | "PyFileObject locking error in " |
| 52 | - "destructor (refcnt <= 0 at close)."); |
| 53 | + "destructor (refcnt <= 0 at close)"); |
| 54 | } |
| 55 | return NULL; |
| 56 | } |
| 57 | @@ -604,7 +604,12 @@ err_iterbuffered(void) |
| 58 | return NULL; |
| 59 | } |
| 60 | |
| 61 | -static void drop_readahead(PyFileObject *); |
| 62 | +static void |
| 63 | +drop_file_readahead(PyFileObject *f) |
| 64 | +{ |
| 65 | + PyMem_FREE(f->f_buf); |
| 66 | + f->f_buf = NULL; |
| 67 | +} |
| 68 | |
| 69 | /* Methods */ |
| 70 | |
| 71 | @@ -627,7 +632,7 @@ file_dealloc(PyFileObject *f) |
| 72 | Py_XDECREF(f->f_mode); |
| 73 | Py_XDECREF(f->f_encoding); |
| 74 | Py_XDECREF(f->f_errors); |
| 75 | - drop_readahead(f); |
| 76 | + drop_file_readahead(f); |
| 77 | Py_TYPE(f)->tp_free((PyObject *)f); |
| 78 | } |
| 79 | |
| 80 | @@ -762,7 +767,7 @@ file_seek(PyFileObject *f, PyObject *arg |
| 81 | |
| 82 | if (f->f_fp == NULL) |
| 83 | return err_closed(); |
| 84 | - drop_readahead(f); |
| 85 | + drop_file_readahead(f); |
| 86 | whence = 0; |
| 87 | if (!PyArg_ParseTuple(args, "O|i:seek", &offobj, &whence)) |
| 88 | return NULL; |
| 89 | @@ -2221,12 +2226,16 @@ static PyGetSetDef file_getsetlist[] = { |
| 90 | {0}, |
| 91 | }; |
| 92 | |
| 93 | +typedef struct { |
| 94 | + char *buf, *bufptr, *bufend; |
| 95 | +} readaheadbuffer; |
| 96 | + |
| 97 | static void |
| 98 | -drop_readahead(PyFileObject *f) |
| 99 | +drop_readaheadbuffer(readaheadbuffer *rab) |
| 100 | { |
| 101 | - if (f->f_buf != NULL) { |
| 102 | - PyMem_Free(f->f_buf); |
| 103 | - f->f_buf = NULL; |
| 104 | + if (rab->buf != NULL) { |
| 105 | + PyMem_FREE(rab->buf); |
| 106 | + rab->buf = NULL; |
| 107 | } |
| 108 | } |
| 109 | |
| 110 | @@ -2234,35 +2243,34 @@ drop_readahead(PyFileObject *f) |
| 111 | (unless at EOF) and no more than bufsize. Returns negative value on |
| 112 | error, will set MemoryError if bufsize bytes cannot be allocated. */ |
| 113 | static int |
| 114 | -readahead(PyFileObject *f, Py_ssize_t bufsize) |
| 115 | +readahead(PyFileObject *f, readaheadbuffer *rab, Py_ssize_t bufsize) |
| 116 | { |
| 117 | Py_ssize_t chunksize; |
| 118 | |
| 119 | - if (f->f_buf != NULL) { |
| 120 | - if( (f->f_bufend - f->f_bufptr) >= 1) |
| 121 | + if (rab->buf != NULL) { |
| 122 | + if ((rab->bufend - rab->bufptr) >= 1) |
| 123 | return 0; |
| 124 | else |
| 125 | - drop_readahead(f); |
| 126 | + drop_readaheadbuffer(rab); |
| 127 | } |
| 128 | - if ((f->f_buf = (char *)PyMem_Malloc(bufsize)) == NULL) { |
| 129 | + if ((rab->buf = PyMem_MALLOC(bufsize)) == NULL) { |
| 130 | PyErr_NoMemory(); |
| 131 | return -1; |
| 132 | } |
| 133 | FILE_BEGIN_ALLOW_THREADS(f) |
| 134 | errno = 0; |
| 135 | - chunksize = Py_UniversalNewlineFread( |
| 136 | - f->f_buf, bufsize, f->f_fp, (PyObject *)f); |
| 137 | + chunksize = Py_UniversalNewlineFread(rab->buf, bufsize, f->f_fp, (PyObject *)f); |
| 138 | FILE_END_ALLOW_THREADS(f) |
| 139 | if (chunksize == 0) { |
| 140 | if (ferror(f->f_fp)) { |
| 141 | PyErr_SetFromErrno(PyExc_IOError); |
| 142 | clearerr(f->f_fp); |
| 143 | - drop_readahead(f); |
| 144 | + drop_readaheadbuffer(rab); |
| 145 | return -1; |
| 146 | } |
| 147 | } |
| 148 | - f->f_bufptr = f->f_buf; |
| 149 | - f->f_bufend = f->f_buf + chunksize; |
| 150 | + rab->bufptr = rab->buf; |
| 151 | + rab->bufend = rab->buf + chunksize; |
| 152 | return 0; |
| 153 | } |
| 154 | |
| 155 | @@ -2272,45 +2280,43 @@ readahead(PyFileObject *f, Py_ssize_t bu |
| 156 | logarithmic buffer growth to about 50 even when reading a 1gb line. */ |
| 157 | |
| 158 | static PyStringObject * |
| 159 | -readahead_get_line_skip(PyFileObject *f, Py_ssize_t skip, Py_ssize_t bufsize) |
| 160 | +readahead_get_line_skip(PyFileObject *f, readaheadbuffer *rab, Py_ssize_t skip, Py_ssize_t bufsize) |
| 161 | { |
| 162 | PyStringObject* s; |
| 163 | char *bufptr; |
| 164 | char *buf; |
| 165 | Py_ssize_t len; |
| 166 | |
| 167 | - if (f->f_buf == NULL) |
| 168 | - if (readahead(f, bufsize) < 0) |
| 169 | + if (rab->buf == NULL) |
| 170 | + if (readahead(f, rab, bufsize) < 0) |
| 171 | return NULL; |
| 172 | |
| 173 | - len = f->f_bufend - f->f_bufptr; |
| 174 | + len = rab->bufend - rab->bufptr; |
| 175 | if (len == 0) |
| 176 | - return (PyStringObject *) |
| 177 | - PyString_FromStringAndSize(NULL, skip); |
| 178 | - bufptr = (char *)memchr(f->f_bufptr, '\n', len); |
| 179 | + return (PyStringObject *)PyString_FromStringAndSize(NULL, skip); |
| 180 | + bufptr = (char *)memchr(rab->bufptr, '\n', len); |
| 181 | if (bufptr != NULL) { |
| 182 | bufptr++; /* Count the '\n' */ |
| 183 | - len = bufptr - f->f_bufptr; |
| 184 | - s = (PyStringObject *) |
| 185 | - PyString_FromStringAndSize(NULL, skip + len); |
| 186 | + len = bufptr - rab->bufptr; |
| 187 | + s = (PyStringObject *)PyString_FromStringAndSize(NULL, skip + len); |
| 188 | if (s == NULL) |
| 189 | return NULL; |
| 190 | - memcpy(PyString_AS_STRING(s) + skip, f->f_bufptr, len); |
| 191 | - f->f_bufptr = bufptr; |
| 192 | - if (bufptr == f->f_bufend) |
| 193 | - drop_readahead(f); |
| 194 | + memcpy(PyString_AS_STRING(s) + skip, rab->bufptr, len); |
| 195 | + rab->bufptr = bufptr; |
| 196 | + if (bufptr == rab->bufend) |
| 197 | + drop_readaheadbuffer(rab); |
| 198 | } else { |
| 199 | - bufptr = f->f_bufptr; |
| 200 | - buf = f->f_buf; |
| 201 | - f->f_buf = NULL; /* Force new readahead buffer */ |
| 202 | + bufptr = rab->bufptr; |
| 203 | + buf = rab->buf; |
| 204 | + rab->buf = NULL; /* Force new readahead buffer */ |
| 205 | assert(len <= PY_SSIZE_T_MAX - skip); |
| 206 | - s = readahead_get_line_skip(f, skip + len, bufsize + (bufsize>>2)); |
| 207 | + s = readahead_get_line_skip(f, rab, skip + len, bufsize + (bufsize>>2)); |
| 208 | if (s == NULL) { |
| 209 | - PyMem_Free(buf); |
| 210 | + PyMem_FREE(buf); |
| 211 | return NULL; |
| 212 | } |
| 213 | memcpy(PyString_AS_STRING(s) + skip, bufptr, len); |
| 214 | - PyMem_Free(buf); |
| 215 | + PyMem_FREE(buf); |
| 216 | } |
| 217 | return s; |
| 218 | } |
| 219 | @@ -2328,7 +2334,30 @@ file_iternext(PyFileObject *f) |
| 220 | if (!f->readable) |
| 221 | return err_mode("reading"); |
| 222 | |
| 223 | - l = readahead_get_line_skip(f, 0, READAHEAD_BUFSIZE); |
| 224 | + { |
| 225 | + /* |
| 226 | + Multiple threads can enter this method while the GIL is released |
| 227 | + during file read and wreak havoc on the file object's readahead |
| 228 | + buffer. To avoid dealing with cross-thread coordination issues, we |
| 229 | + cache the file buffer state locally and only set it back on the file |
| 230 | + object when we're done. |
| 231 | + */ |
| 232 | + readaheadbuffer rab = {f->f_buf, f->f_bufptr, f->f_bufend}; |
| 233 | + f->f_buf = NULL; |
| 234 | + l = readahead_get_line_skip(f, &rab, 0, READAHEAD_BUFSIZE); |
| 235 | + /* |
| 236 | + Make sure the file's internal read buffer is cleared out. This will |
| 237 | + only do anything if some other thread interleaved with us during |
| 238 | + readahead. We want to drop any changeling buffer, so we don't leak |
| 239 | + memory. We may lose data, but that's what you get for reading the same |
| 240 | + file object in multiple threads. |
| 241 | + */ |
| 242 | + drop_file_readahead(f); |
| 243 | + f->f_buf = rab.buf; |
| 244 | + f->f_bufptr = rab.bufptr; |
| 245 | + f->f_bufend = rab.bufend; |
| 246 | + } |
| 247 | + |
| 248 | if (l == NULL || PyString_GET_SIZE(l) == 0) { |
| 249 | Py_XDECREF(l); |
| 250 | return NULL; |
| 251 | @@ -2692,7 +2721,7 @@ int PyObject_AsFileDescriptor(PyObject * |
| 252 | } |
| 253 | else { |
| 254 | PyErr_SetString(PyExc_TypeError, |
| 255 | - "argument must be an int, or have a fileno() method."); |
| 256 | + "argument must be an int, or have a fileno() method"); |
| 257 | return -1; |
| 258 | } |