Revision 76d9365a sdk/python/arvados/arvfile.py

View differences:

sdk/python/arvados/arvfile.py
112 112
    def readall(self, size=2**20, num_retries=None):
113 113
        while True:
114 114
            data = self.read(size, num_retries=num_retries)
115
            if data == '':
115
            if len(data) == 0:
116 116
                break
117 117
            yield data
118 118

  
......
124 124
            data = [cache_data]
125 125
            self._filepos += len(cache_data)
126 126
        else:
127
            data = ['']
127
            data = [b'']
128 128
        data_size = len(data[-1])
129
        while (data_size < size) and ('\n' not in data[-1]):
129
        while (data_size < size) and (b'\n' not in data[-1]):
130 130
            next_read = self.read(2 ** 20, num_retries=num_retries)
131 131
            if not next_read:
132 132
                break
133 133
            data.append(next_read)
134 134
            data_size += len(next_read)
135
        data = ''.join(data)
135
        data = b''.join(data)
136 136
        try:
137
            nextline_index = data.index('\n') + 1
137
            nextline_index = data.index(b'\n') + 1
138 138
        except ValueError:
139 139
            nextline_index = len(data)
140 140
        nextline_index = min(nextline_index, size)
141 141
        self._filepos -= len(data) - nextline_index
142 142
        self._readline_cache = (self.tell(), data[nextline_index:])
143
        return data[:nextline_index]
143
        return data[:nextline_index].decode()
144 144

  
145 145
    @_FileLikeObjectBase._before_close
146 146
    @retry_method
......
175 175
            data_size += len(s)
176 176
            if data_size >= sizehint:
177 177
                break
178
        return ''.join(data).splitlines(True)
178
        return b''.join(data).decode().splitlines(True)
179 179

  
180 180
    def size(self):
181 181
        raise NotImplementedError()
......
212 212
    def read(self, size, num_retries=None):
213 213
        """Read up to 'size' bytes from the stream, starting at the current file position"""
214 214
        if size == 0:
215
            return ''
215
            return b''
216 216

  
217
        data = ''
217
        data = b''
218 218
        available_chunks = locators_and_ranges(self.segments, self._filepos, size)
219 219
        if available_chunks:
220 220
            lr = available_chunks[0]
......
230 230
    def readfrom(self, start, size, num_retries=None):
231 231
        """Read up to 'size' bytes from the stream, starting at 'start'"""
232 232
        if size == 0:
233
            return ''
233
            return b''
234 234

  
235 235
        data = []
236 236
        for lr in locators_and_ranges(self.segments, start, size):
237 237
            data.append(self._stream.readfrom(lr.locator+lr.segment_offset, lr.segment_size,
238 238
                                              num_retries=num_retries))
239
        return ''.join(data)
239
        return b''.join(data)
240 240

  
241 241
    def as_manifest(self):
242 242
        segs = []
......
316 316

  
317 317
        """
318 318
        if self._state == _BufferBlock.WRITABLE:
319
            if not isinstance(data, bytes):
320
                data = data.encode()
319 321
            while (self.write_pointer+len(data)) > len(self.buffer_block):
320 322
                new_buffer_block = bytearray(len(self.buffer_block) * 2)
321 323
                new_buffer_block[0:self.write_pointer] = self.buffer_block[0:self.write_pointer]
......
944 946

  
945 947
        with self.lock:
946 948
            if size == 0 or offset >= self.size():
947
                return ''
949
                return b''
948 950
            readsegs = locators_and_ranges(self._segments, offset, size)
949 951
            prefetch = locators_and_ranges(self._segments, offset + size, config.KEEP_BLOCK_SIZE, limit=32)
950 952

  
......
964 966
                self.parent._my_block_manager().block_prefetch(lr.locator)
965 967
                locs.add(lr.locator)
966 968

  
967
        return ''.join(data)
969
        return b''.join(data)
968 970

  
969 971
    def _repack_writes(self, num_retries):
970 972
        """Test if the buffer block has more data than actual segments.
......
1001 1003
        necessary.
1002 1004

  
1003 1005
        """
1006
        if not isinstance(data, bytes):
1007
            data = data.encode()
1004 1008
        if len(data) == 0:
1005 1009
            return
1006 1010

  
......
1157 1161
                data.append(rd)
1158 1162
                self._filepos += len(rd)
1159 1163
                rd = self.arvadosfile.readfrom(self._filepos, config.KEEP_BLOCK_SIZE, num_retries)
1160
            return ''.join(data)
1164
            return b''.join(data)
1161 1165
        else:
1162 1166
            data = self.arvadosfile.readfrom(self._filepos, size, num_retries, exact=True)
1163 1167
            self._filepos += len(data)

Also available in: Unified diff