Commit | Line | Data |
---|---|---|
3f0678fe VM |
1 | --- a/Lib/test/test_file2k.py 2018-02-16 17:49:45.180147747 -0500 |
2 | +++ b/Lib/test/test_file2k.py 2018-02-16 17:51:06.870149602 -0500 | |
3 | @@ -652,6 +652,33 @@ class FileThreadingTests(unittest.TestCa | |
4 | self.f.writelines('') | |
5 | self._test_close_open_io(io_func) | |
6 | ||
7 | + def test_iteration_torture(self): | |
8 | + # bpo-31530 | |
9 | + with open(self.filename, "wb") as fp: | |
10 | + for i in xrange(2**20): | |
11 | + fp.write(b"0"*50 + b"\n") | |
12 | + with open(self.filename, "rb") as f: | |
13 | + def it(): | |
14 | + for l in f: | |
15 | + pass | |
16 | + self._run_workers(it, 10) | |
17 | + | |
18 | + def test_iteration_seek(self): | |
19 | + # bpo-31530: Crash when concurrently seek and iterate over a file. | |
20 | + with open(self.filename, "wb") as fp: | |
21 | + for i in xrange(10000): | |
22 | + fp.write(b"0"*50 + b"\n") | |
23 | + with open(self.filename, "rb") as f: | |
24 | + it = iter([1] + [0]*10) # one thread reads, others seek | |
25 | + def iterate(): | |
26 | + if next(it): | |
27 | + for l in f: | |
28 | + pass | |
29 | + else: | |
30 | + for i in xrange(100): | |
31 | + f.seek(i*100, 0) | |
32 | + self._run_workers(iterate, 10) | |
33 | + | |
34 | ||
35 | @unittest.skipUnless(os.name == 'posix', 'test requires a posix system.') | |
36 | class TestFileSignalEINTR(unittest.TestCase): | |
37 | --- a/Objects/fileobject.c 2018-02-16 17:49:45.304147750 -0500 | |
38 | +++ b/Objects/fileobject.c 2018-02-16 17:51:06.872149603 -0500 | |
39 | @@ -430,7 +430,7 @@ close_the_file(PyFileObject *f) | |
40 | if (f->ob_refcnt > 0) { | |
41 | PyErr_SetString(PyExc_IOError, | |
42 | "close() called during concurrent " | |
43 | - "operation on the same file object."); | |
44 | + "operation on the same file object"); | |
45 | } else { | |
46 | /* This should not happen unless someone is | |
47 | * carelessly playing with the PyFileObject | |
48 | @@ -438,7 +438,7 @@ close_the_file(PyFileObject *f) | |
49 | * pointer. */ | |
50 | PyErr_SetString(PyExc_SystemError, | |
51 | "PyFileObject locking error in " | |
52 | - "destructor (refcnt <= 0 at close)."); | |
53 | + "destructor (refcnt <= 0 at close)"); | |
54 | } | |
55 | return NULL; | |
56 | } | |
57 | @@ -604,7 +604,12 @@ err_iterbuffered(void) | |
58 | return NULL; | |
59 | } | |
60 | ||
61 | -static void drop_readahead(PyFileObject *); | |
62 | +static void | |
63 | +drop_file_readahead(PyFileObject *f) | |
64 | +{ | |
65 | + PyMem_FREE(f->f_buf); | |
66 | + f->f_buf = NULL; | |
67 | +} | |
68 | ||
69 | /* Methods */ | |
70 | ||
71 | @@ -627,7 +632,7 @@ file_dealloc(PyFileObject *f) | |
72 | Py_XDECREF(f->f_mode); | |
73 | Py_XDECREF(f->f_encoding); | |
74 | Py_XDECREF(f->f_errors); | |
75 | - drop_readahead(f); | |
76 | + drop_file_readahead(f); | |
77 | Py_TYPE(f)->tp_free((PyObject *)f); | |
78 | } | |
79 | ||
80 | @@ -762,7 +767,7 @@ file_seek(PyFileObject *f, PyObject *arg | |
81 | ||
82 | if (f->f_fp == NULL) | |
83 | return err_closed(); | |
84 | - drop_readahead(f); | |
85 | + drop_file_readahead(f); | |
86 | whence = 0; | |
87 | if (!PyArg_ParseTuple(args, "O|i:seek", &offobj, &whence)) | |
88 | return NULL; | |
89 | @@ -2221,12 +2226,16 @@ static PyGetSetDef file_getsetlist[] = { | |
90 | {0}, | |
91 | }; | |
92 | ||
93 | +typedef struct { | |
94 | + char *buf, *bufptr, *bufend; | |
95 | +} readaheadbuffer; | |
96 | + | |
97 | static void | |
98 | -drop_readahead(PyFileObject *f) | |
99 | +drop_readaheadbuffer(readaheadbuffer *rab) | |
100 | { | |
101 | - if (f->f_buf != NULL) { | |
102 | - PyMem_Free(f->f_buf); | |
103 | - f->f_buf = NULL; | |
104 | + if (rab->buf != NULL) { | |
105 | + PyMem_FREE(rab->buf); | |
106 | + rab->buf = NULL; | |
107 | } | |
108 | } | |
109 | ||
110 | @@ -2234,35 +2243,34 @@ drop_readahead(PyFileObject *f) | |
111 | (unless at EOF) and no more than bufsize. Returns negative value on | |
112 | error, will set MemoryError if bufsize bytes cannot be allocated. */ | |
113 | static int | |
114 | -readahead(PyFileObject *f, Py_ssize_t bufsize) | |
115 | +readahead(PyFileObject *f, readaheadbuffer *rab, Py_ssize_t bufsize) | |
116 | { | |
117 | Py_ssize_t chunksize; | |
118 | ||
119 | - if (f->f_buf != NULL) { | |
120 | - if( (f->f_bufend - f->f_bufptr) >= 1) | |
121 | + if (rab->buf != NULL) { | |
122 | + if ((rab->bufend - rab->bufptr) >= 1) | |
123 | return 0; | |
124 | else | |
125 | - drop_readahead(f); | |
126 | + drop_readaheadbuffer(rab); | |
127 | } | |
128 | - if ((f->f_buf = (char *)PyMem_Malloc(bufsize)) == NULL) { | |
129 | + if ((rab->buf = PyMem_MALLOC(bufsize)) == NULL) { | |
130 | PyErr_NoMemory(); | |
131 | return -1; | |
132 | } | |
133 | FILE_BEGIN_ALLOW_THREADS(f) | |
134 | errno = 0; | |
135 | - chunksize = Py_UniversalNewlineFread( | |
136 | - f->f_buf, bufsize, f->f_fp, (PyObject *)f); | |
137 | + chunksize = Py_UniversalNewlineFread(rab->buf, bufsize, f->f_fp, (PyObject *)f); | |
138 | FILE_END_ALLOW_THREADS(f) | |
139 | if (chunksize == 0) { | |
140 | if (ferror(f->f_fp)) { | |
141 | PyErr_SetFromErrno(PyExc_IOError); | |
142 | clearerr(f->f_fp); | |
143 | - drop_readahead(f); | |
144 | + drop_readaheadbuffer(rab); | |
145 | return -1; | |
146 | } | |
147 | } | |
148 | - f->f_bufptr = f->f_buf; | |
149 | - f->f_bufend = f->f_buf + chunksize; | |
150 | + rab->bufptr = rab->buf; | |
151 | + rab->bufend = rab->buf + chunksize; | |
152 | return 0; | |
153 | } | |
154 | ||
155 | @@ -2272,45 +2280,43 @@ readahead(PyFileObject *f, Py_ssize_t bu | |
156 | logarithmic buffer growth to about 50 even when reading a 1gb line. */ | |
157 | ||
158 | static PyStringObject * | |
159 | -readahead_get_line_skip(PyFileObject *f, Py_ssize_t skip, Py_ssize_t bufsize) | |
160 | +readahead_get_line_skip(PyFileObject *f, readaheadbuffer *rab, Py_ssize_t skip, Py_ssize_t bufsize) | |
161 | { | |
162 | PyStringObject* s; | |
163 | char *bufptr; | |
164 | char *buf; | |
165 | Py_ssize_t len; | |
166 | ||
167 | - if (f->f_buf == NULL) | |
168 | - if (readahead(f, bufsize) < 0) | |
169 | + if (rab->buf == NULL) | |
170 | + if (readahead(f, rab, bufsize) < 0) | |
171 | return NULL; | |
172 | ||
173 | - len = f->f_bufend - f->f_bufptr; | |
174 | + len = rab->bufend - rab->bufptr; | |
175 | if (len == 0) | |
176 | - return (PyStringObject *) | |
177 | - PyString_FromStringAndSize(NULL, skip); | |
178 | - bufptr = (char *)memchr(f->f_bufptr, '\n', len); | |
179 | + return (PyStringObject *)PyString_FromStringAndSize(NULL, skip); | |
180 | + bufptr = (char *)memchr(rab->bufptr, '\n', len); | |
181 | if (bufptr != NULL) { | |
182 | bufptr++; /* Count the '\n' */ | |
183 | - len = bufptr - f->f_bufptr; | |
184 | - s = (PyStringObject *) | |
185 | - PyString_FromStringAndSize(NULL, skip + len); | |
186 | + len = bufptr - rab->bufptr; | |
187 | + s = (PyStringObject *)PyString_FromStringAndSize(NULL, skip + len); | |
188 | if (s == NULL) | |
189 | return NULL; | |
190 | - memcpy(PyString_AS_STRING(s) + skip, f->f_bufptr, len); | |
191 | - f->f_bufptr = bufptr; | |
192 | - if (bufptr == f->f_bufend) | |
193 | - drop_readahead(f); | |
194 | + memcpy(PyString_AS_STRING(s) + skip, rab->bufptr, len); | |
195 | + rab->bufptr = bufptr; | |
196 | + if (bufptr == rab->bufend) | |
197 | + drop_readaheadbuffer(rab); | |
198 | } else { | |
199 | - bufptr = f->f_bufptr; | |
200 | - buf = f->f_buf; | |
201 | - f->f_buf = NULL; /* Force new readahead buffer */ | |
202 | + bufptr = rab->bufptr; | |
203 | + buf = rab->buf; | |
204 | + rab->buf = NULL; /* Force new readahead buffer */ | |
205 | assert(len <= PY_SSIZE_T_MAX - skip); | |
206 | - s = readahead_get_line_skip(f, skip + len, bufsize + (bufsize>>2)); | |
207 | + s = readahead_get_line_skip(f, rab, skip + len, bufsize + (bufsize>>2)); | |
208 | if (s == NULL) { | |
209 | - PyMem_Free(buf); | |
210 | + PyMem_FREE(buf); | |
211 | return NULL; | |
212 | } | |
213 | memcpy(PyString_AS_STRING(s) + skip, bufptr, len); | |
214 | - PyMem_Free(buf); | |
215 | + PyMem_FREE(buf); | |
216 | } | |
217 | return s; | |
218 | } | |
219 | @@ -2328,7 +2334,30 @@ file_iternext(PyFileObject *f) | |
220 | if (!f->readable) | |
221 | return err_mode("reading"); | |
222 | ||
223 | - l = readahead_get_line_skip(f, 0, READAHEAD_BUFSIZE); | |
224 | + { | |
225 | + /* | |
226 | + Multiple threads can enter this method while the GIL is released | |
227 | + during file read and wreak havoc on the file object's readahead | |
228 | + buffer. To avoid dealing with cross-thread coordination issues, we | |
229 | + cache the file buffer state locally and only set it back on the file | |
230 | + object when we're done. | |
231 | + */ | |
232 | + readaheadbuffer rab = {f->f_buf, f->f_bufptr, f->f_bufend}; | |
233 | + f->f_buf = NULL; | |
234 | + l = readahead_get_line_skip(f, &rab, 0, READAHEAD_BUFSIZE); | |
235 | + /* | |
236 | + Make sure the file's internal read buffer is cleared out. This will | |
237 | + only do anything if some other thread interleaved with us during | |
238 | + readahead. We want to drop any changeling buffer, so we don't leak | |
239 | + memory. We may lose data, but that's what you get for reading the same | |
240 | + file object in multiple threads. | |
241 | + */ | |
242 | + drop_file_readahead(f); | |
243 | + f->f_buf = rab.buf; | |
244 | + f->f_bufptr = rab.bufptr; | |
245 | + f->f_bufend = rab.bufend; | |
246 | + } | |
247 | + | |
248 | if (l == NULL || PyString_GET_SIZE(l) == 0) { | |
249 | Py_XDECREF(l); | |
250 | return NULL; | |
251 | @@ -2692,7 +2721,7 @@ int PyObject_AsFileDescriptor(PyObject * | |
252 | } | |
253 | else { | |
254 | PyErr_SetString(PyExc_TypeError, | |
255 | - "argument must be an int, or have a fileno() method."); | |
256 | + "argument must be an int, or have a fileno() method"); | |
257 | return -1; | |
258 | } |