1 """
2 Object operations
3
4 An Object is analogous to a file on a conventional filesystem. You can
5 read data from, or write data to your Objects. You can also associate
6 arbitrary metadata with them.
7
8 See COPYING for license information.
9 """
10
11 try:
12 from hashlib import md5
13 except ImportError:
14 from md5 import md5
15 import StringIO
16 import mimetypes
17 import os
18
19 from urllib import quote
20 from errors import ResponseError, NoSuchObject, \
21 InvalidObjectName, IncompleteSend, \
22 InvalidMetaName, InvalidMetaValue
23
24 from socket import timeout
25 import consts
26 from utils import requires_name
27
28
29
30
31
32 -class Object(object):
33 """
34 Storage data representing an object, (metadata and data).
35
36 @undocumented: _make_headers
37 @undocumented: _name_check
38 @undocumented: _initialize
39 @undocumented: compute_md5sum
40 @undocumented: __get_conn_for_write
41 @ivar name: the object's name (generally treat as read-only)
42 @type name: str
43 @ivar content_type: the object's content-type (set or read)
44 @type content_type: str
45 @ivar metadata: metadata associated with the object (set or read)
46 @type metadata: dict
47 @ivar size: the object's size (cached)
48 @type size: number
49 @ivar last_modified: date and time of last file modification (cached)
50 @type last_modified: str
51 @ivar container: the object's container (generally treat as read-only)
52 @type container: L{Container}
53 """
54
55 objsum = property(lambda self: self._etag)
56
58 self._etag = value
59 self._etag_override = True
60
61 etag = property(lambda self: self._etag, __set_etag)
62
63 - def __init__(self, container, name=None, force_exists=False, object_record=None):
64 """
65 Storage objects rarely if ever need to be instantiated directly by the
66 user.
67
68 Instead, use the L{create_object<Container.create_object>},
69 L{get_object<Container.get_object>},
70 L{list_objects<Container.list_objects>} and other
71 methods on its parent L{Container} object.
72 """
73 self.container = container
74 self.last_modified = None
75 self.metadata = {}
76 if object_record:
77 self.name = object_record['name']
78 self.content_type = object_record['content_type']
79 self.size = object_record['bytes']
80 self.last_modified = object_record['last_modified']
81 self._etag = object_record['hash']
82 self._etag_override = False
83 else:
84 self.name = name
85 self.content_type = None
86 self.size = None
87 self._etag = None
88 self._etag_override = False
89 if not self._initialize() and force_exists:
90 raise NoSuchObject(self.name)
91
92 @requires_name(InvalidObjectName)
93 - def read(self, size=-1, offset=0, hdrs=None, buffer=None, callback=None):
94 """
95 Read the content from the remote storage object.
96
97 By default this method will buffer the response in memory and
98 return it as a string. However, if a file-like object is passed
99 in using the buffer keyword, the response will be written to it
100 instead.
101
102 A callback can be passed in for reporting on the progress of
103 the download. The callback should accept two integers, the first
104 will be for the amount of data written so far, the second for
105 the total size of the transfer. Note: This option is only
106 applicable when used in conjunction with the buffer option.
107
108 >>> test_object.write('hello')
109 >>> test_object.read()
110 'hello'
111
112 @param size: combined with offset, defines the length of data to be read
113 @type size: number
114 @param offset: combined with size, defines the start location to be read
115 @type offset: number
116 @param hdrs: an optional dict of headers to send with the request
117 @type hdrs: dictionary
118 @param buffer: an optional file-like object to write the content to
119 @type buffer: file-like object
120 @param callback: function to be used as a progress callback
121 @type callback: callable(transferred, size)
122 @rtype: str or None
123 @return: a string of all data in the object, or None if a buffer is used
124 """
125 self._name_check()
126 if size > 0:
127 range = 'bytes=%d-%d' % (offset, (offset + size) - 1)
128 if hdrs:
129 hdrs['Range'] = range
130 else:
131 hdrs = {'Range': range}
132 response = self.container.conn.make_request('GET',
133 path = [self.container.name, self.name], hdrs = hdrs)
134 if (response.status < 200) or (response.status > 299):
135 buff = response.read()
136 raise ResponseError(response.status, response.reason)
137
138 if hasattr(buffer, 'write'):
139 scratch = response.read(8192)
140 transferred = 0
141
142 while len(scratch) > 0:
143 buffer.write(scratch)
144 transferred += len(scratch)
145 if callable(callback):
146 callback(transferred, self.size)
147 scratch = response.read(8192)
148 return None
149 else:
150 return response.read()
151
153 """
154 Save the contents of the object to filename.
155
156 >>> container = connection['container1']
157 >>> obj = container.get_object('backup_file')
158 >>> obj.save_to_filename('./backup_file')
159
160 @param filename: name of the file
161 @type filename: str
162 @param callback: function to be used as a progress callback
163 @type callback: callable(transferred, size)
164 """
165 try:
166 fobj = open(filename, 'wb')
167 self.read(buffer=fobj, callback=callback)
168 finally:
169 fobj.close()
170
171 @requires_name(InvalidObjectName)
172 - def stream(self, chunksize=8192, hdrs=None):
173 """
174 Return a generator of the remote storage object's data.
175
176 Warning: The HTTP response is only complete after this generator
177 has raised a StopIteration. No other methods can be called until
178 this has occurred.
179
180 >>> test_object.write('hello')
181 >>> test_object.stream()
182 <generator object at 0xb77939cc>
183 >>> '-'.join(test_object.stream(chunksize=1))
184 'h-e-l-l-o'
185
186 @param chunksize: size in bytes yielded by the generator
187 @type chunksize: number
188 @param hdrs: an optional dict of headers to send in the request
189 @type hdrs: dict
190 @rtype: str generator
191 @return: a generator which yields strings as the object is downloaded
192 """
193 self._name_check()
194 response = self.container.conn.make_request('GET',
195 path = [self.container.name, self.name], hdrs = hdrs)
196 if response.status < 200 or response.status > 299:
197 buff = response.read()
198 raise ResponseError(response.status, response.reason)
199 buff = response.read(chunksize)
200 while len(buff) > 0:
201 yield buff
202 buff = response.read(chunksize)
203
204 buff = response.read()
205
206 @requires_name(InvalidObjectName)
228
230 headers = self._make_headers()
231
232 headers['X-Auth-Token'] = self.container.conn.token
233
234 path = "/%s/%s/%s" % (self.container.conn.uri.rstrip('/'), \
235 quote(self.container.name), quote(self.name))
236
237
238 http = self.container.conn.connection
239
240
241 http.putrequest('PUT', path)
242 for hdr in headers:
243 http.putheader(hdr, headers[hdr])
244 http.putheader('User-Agent', consts.user_agent)
245 http.endheaders()
246 return http
247
248
249 @requires_name(InvalidObjectName)
250 - def write(self, data='', verify=True, callback=None):
251 """
252 Write data to the remote storage system.
253
254 By default, server-side verification is enabled, (verify=True), and
255 end-to-end verification is performed using an md5 checksum. When
256 verification is disabled, (verify=False), the etag attribute will
257 be set to the value returned by the server, not one calculated
258 locally. When disabling verification, there is no guarantee that
259 what you think was uploaded matches what was actually stored. Use
260 this optional carefully. You have been warned.
261
262 A callback can be passed in for reporting on the progress of
263 the upload. The callback should accept two integers, the first
264 will be for the amount of data written so far, the second for
265 the total size of the transfer.
266
267 >>> test_object = container.create_object('file.txt')
268 >>> test_object.content_type = 'text/plain'
269 >>> fp = open('./file.txt')
270 >>> test_object.write(fp)
271
272 @param data: the data to be written
273 @type data: str or file
274 @param verify: enable/disable server-side checksum verification
275 @type verify: boolean
276 @param callback: function to be used as a progress callback
277 @type callback: callable(transferred, size)
278 """
279 self._name_check()
280 if isinstance(data, file):
281
282 try:
283 data.flush()
284 except IOError:
285 pass
286 self.size = int(os.fstat(data.fileno())[6])
287 else:
288 data = StringIO.StringIO(data)
289 self.size = data.len
290
291
292
293
294 if not self._etag_override:
295 self._etag = None
296
297 if not self.content_type:
298
299 type = None
300 if hasattr(data, 'name'):
301 type = mimetypes.guess_type(data.name)[0]
302 self.content_type = type and type or 'application/octet-stream'
303
304 http = self.__get_conn_for_write()
305
306 response = None
307 transfered = 0
308 running_checksum = md5()
309
310 buff = data.read(4096)
311 try:
312 while len(buff) > 0:
313 http.send(buff)
314 if verify and not self._etag_override:
315 running_checksum.update(buff)
316 buff = data.read(4096)
317 transfered += len(buff)
318 if callable(callback):
319 callback(transfered, self.size)
320 response = http.getresponse()
321 buff = response.read()
322 except timeout, err:
323 if response:
324
325 buff = response.read()
326 raise err
327 else:
328 if verify and not self._etag_override:
329 self._etag = running_checksum.hexdigest()
330
331
332
333 if (response.status < 200) or (response.status > 299):
334 raise ResponseError(response.status, response.reason)
335
336
337
338 if not verify:
339 for hdr in response.getheaders():
340 if hdr[0].lower() == 'etag':
341 self._etag = hdr[1]
342
343 @requires_name(InvalidObjectName)
344 - def send(self, iterable):
345 """
346 Write potentially transient data to the remote storage system using a
347 generator or stream.
348
349 If the object's size is not set, chunked transfer encoding will be
350 used to upload the file.
351
352 If the object's size attribute is set, it will be used as the
353 Content-Length. If the generator raises StopIteration prior to yielding
354 the right number of bytes, an IncompleteSend exception is raised.
355
356 If the content_type attribute is not set then a value of
357 application/octet-stream will be used.
358
359 Server-side verification will be performed if an md5 checksum is
360 assigned to the etag property before calling this method,
361 otherwise no verification will be performed, (verification
362 can be performed afterward though by using the etag attribute
363 which is set to the value returned by the server).
364
365 >>> test_object = container.create_object('backup.tar.gz')
366 >>> pfd = os.popen('tar -czvf - ./data/', 'r')
367 >>> test_object.send(pfd)
368
369 @param iterable: stream or generator which yields the content to upload
370 @type iterable: generator or stream
371 """
372 self._name_check()
373
374 if hasattr(iterable, 'read'):
375 def file_iterator(file):
376 chunk = file.read(4095)
377 while chunk:
378 yield chunk
379 chunk = file.read(4095)
380 raise StopIteration()
381 iterable = file_iterator(iterable)
382
383
384 if not self._etag_override:
385 self._etag = None
386
387 if not self.content_type:
388 self.content_type = 'application/octet-stream'
389
390 path = "/%s/%s/%s" % (self.container.conn.uri.rstrip('/'), \
391 quote(self.container.name), quote(self.name))
392 headers = self._make_headers()
393 if self.size is None:
394 del headers['Content-Length']
395 headers['Transfer-Encoding'] = 'chunked'
396 headers['X-Auth-Token'] = self.container.conn.token
397 headers['User-Agent'] = consts.user_agent
398 http = self.container.conn.connection
399 http.putrequest('PUT', path)
400 for key, value in headers.iteritems():
401 http.putheader(key, value)
402 http.endheaders()
403
404 response = None
405 transferred = 0
406 try:
407 for chunk in iterable:
408 if self.size is None:
409 http.send("%X\r\n" % len(chunk))
410 http.send(chunk)
411 http.send("\r\n")
412 else:
413 http.send(chunk)
414 transferred += len(chunk)
415 if self.size is None:
416 http.send("0\r\n\r\n")
417
418 elif transferred < self.size:
419 raise IncompleteSend()
420 response = http.getresponse()
421 buff = response.read()
422 except timeout, err:
423 if response:
424
425 buff = response.read()
426 raise err
427
428 if (response.status < 200) or (response.status > 299):
429 raise ResponseError(response.status, response.reason)
430
431 for hdr in response.getheaders():
432 if hdr[0].lower() == 'etag':
433 self._etag = hdr[1]
434
436 """
437 Put the contents of the named file into remote storage.
438
439 >>> test_object = container.create_object('file.txt')
440 >>> test_object.content_type = 'text/plain'
441 >>> test_object.load_from_filename('./my_file.txt')
442
443 @param filename: path to the file
444 @type filename: str
445 @param verify: enable/disable server-side checksum verification
446 @type verify: boolean
447 @param callback: function to be used as a progress callback
448 @type callback: callable(transferred, size)
449 """
450 fobj = open(filename, 'rb')
451 self.write(fobj, verify=verify, callback=callback)
452 fobj.close()
453
455 """
456 Initialize the Object with values from the remote service (if any).
457 """
458 if not self.name:
459 return False
460
461 response = self.container.conn.make_request(
462 'HEAD', [self.container.name, self.name]
463 )
464 buff = response.read()
465 if response.status == 404:
466 return False
467 if (response.status < 200) or (response.status > 299):
468 raise ResponseError(response.status, response.reason)
469 for hdr in response.getheaders():
470 if hdr[0].lower() == 'content-type':
471 self.content_type = hdr[1]
472 if hdr[0].lower().startswith('x-object-meta-'):
473 self.metadata[hdr[0][14:]] = hdr[1]
474 if hdr[0].lower() == 'etag':
475 self._etag = hdr[1]
476 self._etag_override = False
477 if hdr[0].lower() == 'content-length':
478 self.size = int(hdr[1])
479 if hdr[0].lower() == 'last-modified':
480 self.last_modified = hdr[1]
481 return True
482
485
489
491 """
492 Returns a dictionary representing http headers based on the
493 respective instance attributes.
494 """
495 headers = {}
496 headers['Content-Length'] = self.size and self.size or 0
497 if self._etag: headers['ETag'] = self._etag
498
499 if self.content_type: headers['Content-Type'] = self.content_type
500 else: headers['Content-Type'] = 'application/octet-stream'
501
502 for key in self.metadata:
503 if len(key) > consts.meta_name_limit:
504 raise(InvalidMetaName(key))
505 if len(self.metadata[key]) > consts.meta_value_limit:
506 raise(InvalidMetaValue(self.metadata[key]))
507 headers['X-Object-Meta-'+key] = self.metadata[key]
508 return headers
509
510 @classmethod
512 """
513 Given an open file object, returns the md5 hexdigest of the data.
514 """
515 checksum = md5()
516 buff = fobj.read(4096)
517 while buff:
518 checksum.update(buff)
519 buff = fobj.read(4096)
520 fobj.seek(0)
521 return checksum.hexdigest()
522
524 """
525 Retrieve the URI for this object, if its container is public.
526
527 >>> container1 = connection['container1']
528 >>> container1.make_public()
529 >>> container1.create_object('file.txt').write('testing')
530 >>> container1['file.txt'].public_uri()
531 'http://c00061.cdn.cloudfiles.rackspacecloud.com/file.txt'
532
533 @return: the public URI for this object
534 @rtype: str
535 """
536 return "%s/%s" % (self.container.public_uri().rstrip('/'),
537 quote(self.name))
538
540 """
541 An iterable results set object for Objects.
542
543 This class implements dictionary- and list-like interfaces.
544 """
545 - def __init__(self, container, objects=None):
546 self._objects = objects and objects or list()
547 self._names = [obj['name'] for obj in self._objects]
548 self.container = container
549
552
554 return [Object(self.container, object_record=k) for k in self._objects[i:j]]
555
557 return item in self._objects
558
560 return len(self._objects)
561
563 return 'ObjectResults: %s objects' % len(self._objects)
564 __str__ = __repr__
565
566 - def index(self, value, *args):
567 """
568 returns an integer for the first index of value
569 """
570 return self._names.index(value, *args)
571
573 """
574 returns the number of occurrences of value
575 """
576 return self._names.count(value)
577
578
579