|
20 | 20 | import logging
|
21 | 21 | import datetime
|
22 | 22 |
|
23 |
| -from azure.storage.blob import blockblobservice |
| 23 | +from azure.storage.blob import BlobServiceClient |
24 | 24 |
|
25 | 25 | from luigi.format import get_default_format
|
26 | 26 | from luigi.target import FileAlreadyExists, FileSystem, AtomicLocalFile, FileSystemTarget
|
@@ -62,60 +62,101 @@ def __init__(self, account_name=None, account_key=None, sas_token=None, **kwargs
|
62 | 62 | * `custom_domain` - The custom domain to use. This can be set in the Azure Portal. For example, ‘www.mydomain.com’.
|
63 | 63 | * `token_credential` - A token credential used to authenticate HTTPS requests. The token value should be updated before its expiration.
|
64 | 64 | """
|
65 |
| - self.options = {"account_name": account_name, "account_key": account_key, "sas_token": sas_token} |
| 65 | + if kwargs.get("custom_domain"): |
| 66 | + account_url = "{protocol}://{custom_domain}/{account_name}".format(protocol=kwargs.get("protocol", "https"), |
| 67 | + custom_domain=kwargs.get("custom_domain"), |
| 68 | + account_name=account_name) |
| 69 | + else: |
| 70 | + account_url = "{protocol}://{account_name}.blob.{endpoint_suffix}".format(protocol=kwargs.get("protocol", |
| 71 | + "https"), |
| 72 | + account_name=account_name, |
| 73 | + endpoint_suffix=kwargs.get( |
| 74 | + "endpoint_suffix", |
| 75 | + "core.windows.net")) |
| 76 | + |
| 77 | + self.options = { |
| 78 | + "account_name": account_name, |
| 79 | + "account_key": account_key, |
| 80 | + "account_url": account_url, |
| 81 | + "sas_token": sas_token} |
66 | 82 | self.kwargs = kwargs
|
67 | 83 |
|
68 | 84 | @property
|
69 | 85 | def connection(self):
|
70 |
| - return blockblobservice.BlockBlobService(account_name=self.options.get("account_name"), |
71 |
| - account_key=self.options.get("account_key"), |
72 |
| - sas_token=self.options.get("sas_token"), |
73 |
| - protocol=self.kwargs.get("protocol"), |
74 |
| - connection_string=self.kwargs.get("connection_string"), |
75 |
| - endpoint_suffix=self.kwargs.get("endpoint_suffix"), |
76 |
| - custom_domain=self.kwargs.get("custom_domain"), |
77 |
| - is_emulated=self.kwargs.get("is_emulated") or False) |
| 86 | + if self.kwargs.get("connection_string"): |
| 87 | + return BlobServiceClient.from_connection_string(conn_str=self.kwargs.get("connection_string"), |
| 88 | + **self.kwargs) |
| 89 | + else: |
| 90 | + return BlobServiceClient(account_url=self.options.get("account_url"), |
| 91 | + credential=self.options.get("account_key") or self.options.get("sas_token"), |
| 92 | + **self.kwargs) |
| 93 | + |
| 94 | + def container_client(self, container_name): |
| 95 | + return self.connection.get_container_client(container_name) |
| 96 | + |
| 97 | + def blob_client(self, container_name, blob_name): |
| 98 | + container_client = self.container_client(container_name) |
| 99 | + return container_client.get_blob_client(blob_name) |
78 | 100 |
|
79 | 101 | def upload(self, tmp_path, container, blob, **kwargs):
|
80 | 102 | logging.debug("Uploading file '{tmp_path}' to container '{container}' and blob '{blob}'".format(
|
81 | 103 | tmp_path=tmp_path, container=container, blob=blob))
|
82 | 104 | self.create_container(container)
|
83 |
| - lease_id = self.connection.acquire_blob_lease(container, blob)\ |
84 |
| - if self.exists("{container}/{blob}".format(container=container, blob=blob)) else None |
| 105 | + lease = None |
| 106 | + blob_client = self.blob_client(container, blob) |
| 107 | + if blob_client.exists(): |
| 108 | + lease = blob_client.acquire_lease() |
85 | 109 | try:
|
86 |
| - self.connection.create_blob_from_path(container, blob, tmp_path, lease_id=lease_id, progress_callback=kwargs.get("progress_callback")) |
| 110 | + with open(tmp_path, 'rb') as data: |
| 111 | + blob_client.upload_blob(data, |
| 112 | + overwrite=True, |
| 113 | + lease=lease, |
| 114 | + progress_hook=kwargs.get("progress_callback")) |
87 | 115 | finally:
|
88 |
| - if lease_id is not None: |
89 |
| - self.connection.release_blob_lease(container, blob, lease_id) |
| 116 | + if lease is not None: |
| 117 | + lease.release() |
90 | 118 |
|
91 | 119 | def download_as_bytes(self, container, blob, bytes_to_read=None):
|
92 |
| - start_range, end_range = (0, bytes_to_read-1) if bytes_to_read is not None else (None, None) |
93 | 120 | logging.debug("Downloading from container '{container}' and blob '{blob}' as bytes".format(
|
94 | 121 | container=container, blob=blob))
|
95 |
| - return self.connection.get_blob_to_bytes(container, blob, start_range=start_range, end_range=end_range).content |
| 122 | + blob_client = self.blob_client(container, blob) |
| 123 | + download_stream = blob_client.download_blob(offset=0, length=bytes_to_read) if bytes_to_read \ |
| 124 | + else blob_client.download_blob() |
| 125 | + return download_stream.readall() |
96 | 126 |
|
97 | 127 | def download_as_file(self, container, blob, location):
|
98 | 128 | logging.debug("Downloading from container '{container}' and blob '{blob}' to {location}".format(
|
99 | 129 | container=container, blob=blob, location=location))
|
100 |
| - return self.connection.get_blob_to_path(container, blob, location) |
| 130 | + blob_client = self.blob_client(container, blob) |
| 131 | + with open(location, 'wb') as file: |
| 132 | + download_stream = blob_client.download_blob() |
| 133 | + file.write(download_stream.readall()) |
| 134 | + return blob_client.get_blob_properties() |
101 | 135 |
|
102 | 136 | def create_container(self, container_name):
|
103 |
| - return self.connection.create_container(container_name) |
| 137 | + if not self.exists(container_name): |
| 138 | + return self.connection.create_container(container_name) |
104 | 139 |
|
105 | 140 | def delete_container(self, container_name):
|
106 |
| - lease_id = self.connection.acquire_container_lease(container_name) |
107 |
| - self.connection.delete_container(container_name, lease_id=lease_id) |
| 141 | + container_client = self.container_client(container_name) |
| 142 | + lease = container_client.acquire_lease() |
| 143 | + container_client.delete_container(lease=lease) |
108 | 144 |
|
109 | 145 | def exists(self, path):
|
110 | 146 | container, blob = self.splitfilepath(path)
|
111 |
| - return self.connection.exists(container, blob) |
| 147 | + if blob is None: |
| 148 | + return self.container_client(container).exists() |
| 149 | + else: |
| 150 | + return self.blob_client(container, blob).exists() |
112 | 151 |
|
113 | 152 | def remove(self, path, recursive=True, skip_trash=True):
|
114 |
| - container, blob = self.splitfilepath(path) |
115 | 153 | if not self.exists(path):
|
116 | 154 | return False
|
117 |
| - lease_id = self.connection.acquire_blob_lease(container, blob) |
118 |
| - self.connection.delete_blob(container, blob, lease_id=lease_id) |
| 155 | + |
| 156 | + container, blob = self.splitfilepath(path) |
| 157 | + blob_client = self.blob_client(container, blob) |
| 158 | + lease = blob_client.acquire_lease() |
| 159 | + blob_client.delete_blob(lease=lease) |
119 | 160 | return True
|
120 | 161 |
|
121 | 162 | def mkdir(self, path, parents=True, raise_if_exists=False):
|
@@ -148,16 +189,18 @@ def copy(self, path, dest):
|
148 | 189 | source_container=source_container, dest_container=dest_container
|
149 | 190 | ))
|
150 | 191 |
|
151 |
| - source_lease_id = self.connection.acquire_blob_lease(source_container, source_blob) |
152 |
| - destination_lease_id = self.connection.acquire_blob_lease(dest_container, dest_blob) if self.exists(dest) else None |
| 192 | + source_blob_client = self.blob_client(source_container, source_blob) |
| 193 | + dest_blob_client = self.blob_client(dest_container, dest_blob) |
| 194 | + source_lease = source_blob_client.acquire_lease() |
| 195 | + destination_lease = dest_blob_client.acquire_lease() if self.exists(dest) else None |
153 | 196 | try:
|
154 |
| - return self.connection.copy_blob(source_container, dest_blob, self.connection.make_blob_url( |
155 |
| - source_container, source_blob), |
156 |
| - destination_lease_id=destination_lease_id, source_lease_id=source_lease_id) |
| 197 | + return dest_blob_client.start_copy_from_url(source_url=source_blob_client.url, |
| 198 | + source_lease=source_lease, |
| 199 | + destination_lease=destination_lease) |
157 | 200 | finally:
|
158 |
| - self.connection.release_blob_lease(source_container, source_blob, source_lease_id) |
159 |
| - if destination_lease_id is not None: |
160 |
| - self.connection.release_blob_lease(dest_container, dest_blob, destination_lease_id) |
| 201 | + source_lease.release() |
| 202 | + if destination_lease is not None: |
| 203 | + destination_lease.release() |
161 | 204 |
|
162 | 205 | def rename_dont_move(self, path, dest):
|
163 | 206 | self.move(path, dest)
|
|
0 commit comments