Skip to content

blob_storage

Azure Blob Storage is use to interact with blob storages.

BlobStorage

A class for reading and writing data to Azure Blob Storage. It's a wrapper around the Azure SDK.

Attributes:

Name Type Description
container_client ContainerClient

The client to interact with the Azure Blob Storage container.

Source code in physical_operations_utils/azure_utils/blob_storage.py
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
class BlobStorage:
    """
    A class for reading and writing data to Azure Blob Storage. It's a wrapper around the Azure SDK.

    Attributes:
        container_client (ContainerClient): The client to interact with the Azure Blob Storage container.
    """

    def __init__(self, blob_storage_reference: str, use_sas_token: bool = True):
        """
        Initializes the BlobStorage instance.

        This method:
        1. Reads the necessary keys from a YAML file using `get_keys_yaml_file()`.
        2. Determines the authentication method based on the `use_sas_token` flag:
           - If `use_sas_token` is `True`, retrieves the SAS token from Azure Key Vault using `get_secret()`.
           - Otherwise, uses `DefaultAzureCredential()` for authentication.
        3. Initializes the `ContainerClient` for interacting with the Azure Blob Storage container.

        Args:
            blob_storage_reference (str): The reference key to look up the storage details in the YAML file.
            use_sas_token (bool, optional): Specifies whether to use a SAS token for authentication. Defaults to `True`.

        Raises:
            ValueError: If required keys ('url', 'secret' when `use_sas_token` is `True`, 'container') are missing in the YAML file.
            KeyError: If the reference key is not found in the YAML file.

        Example:
            ```python
            from physical_operations_utils.AzureUtils import BlobStorage

            # Using SAS token authentication
            blobstorage = BlobStorage("esettbackup")
            blobstorage.container_client.exists()

            # Using DefaultAzureCredential
            blobstorage = BlobStorage("esettbackup", use_sas_token=False)
            blobstorage.container_client.exists()

            some_blob_file = blobstorage.container_client.get_blob_client(
                "20250212_DATA_PACKAGE_DP_MGA_IMB_BRP_14600_M2M_6a9622420f7344e99a51a2705047610a.xml"
            )
            ```
        """
        keys = get_keys_yaml_file()
        try:
            blob_storage_keys = keys[blob_storage_reference]
            if "url" not in blob_storage_keys:
                raise ValueError(f"url key not found for '{blob_storage_reference}'")
            if use_sas_token and "secret" not in blob_storage_keys:
                raise ValueError(f"secret key not found for '{blob_storage_reference}'")
            if "container" not in blob_storage_keys:
                raise ValueError(
                    f"container key not found for '{blob_storage_reference}'"
                )
            url = blob_storage_keys["url"]
            if use_sas_token:
                secret_name = blob_storage_keys["secret"]
                credential = get_secret(secret_name)
            else:
                credential = DefaultAzureCredential()
            container = blob_storage_keys["container"]
        except KeyError:
            raise KeyError(
                f"Keys for '{blob_storage_reference}' not found in KEYS_FILE.yml"
            )
        logging.warning(
            f"Connecting to Azure Blob Storage with reference: {blob_storage_reference}"
        )
        logging.warning(f"url: {url}")
        logging.warning(f"container: {container}")
        if use_sas_token:
            logging.warning(f"secret_name: {secret_name}")
        self.container_client = ContainerClient(
            account_url=url, container_name=container, credential=credential
        )

    def write_pandas_df_as_csv(
        self,
        df: pd.DataFrame,
        file_location: str,
        file_name: str,
        overwrite: bool,
        column_seperator: str = ";",
        decimal: str = ".",
        include_index: bool = False,
    ) -> None:
        """
        Writes a pandas DataFrame as a CSV file to Azure Blob Storage.

        Args:
            df (pd.DataFrame): The DataFrame to be written as a CSV file.
            file_location (str): The directory path within the Blob Storage where the file should be stored.
            file_name (str): The name of the CSV file to be created.
            overwrite (bool): Whether to overwrite an existing file with the same name.
            column_seperator (str, optional): The column separator for the CSV file. Defaults to `;`.
            decimal (str, optional): The character to use for decimal points. Defaults to `.`.
            include_index (bool, optional): Whether to include the DataFrame index in the CSV. Defaults to `False`.

        Raises:
            Error: If the file already exists and overwrite is set to False.

        Example:
            ```python
            from physical_operations_utils.AzureUtils import BlobStorage
            import pandas as pd

            df = pd.DataFrame({"col1": [1, 2], "col2": [3, 4]})
            blobstorage = BlobStorage("esettbackup")
            blobstorage.write_pandas_df_as_csv(df, "folder", "data.csv", overwrite=True)
            ```
        """
        csv_buffer = io.StringIO()
        df.to_csv(
            csv_buffer,
            sep=column_seperator,
            decimal=decimal,
            index=include_index,
        )
        content = csv_buffer.getvalue()
        file_location = file_location.strip().rstrip("/")
        file_path = f"{file_location}/{file_name}"
        blob_client = self.container_client.get_blob_client(file_path)
        blob_client.upload_blob(content, overwrite=overwrite)

    def write_text_file_to_blob(
        self,
        content: str,
        file_location: str,
        file_name: str,
        overwrite: bool,
    ) -> None:
        """
        Writes a text file to Azure Blob Storage. Can be used to write html or xml files.

        Args:
            content (str): The content of the text file.
            file_location (str): The directory path within the Blob Storage where the file should be stored.
            file_name (str): The name of the text file to be created.
            overwrite (bool): Whether to overwrite an existing file with the same name.

        Raises:
            Error: If the file already exists and overwrite is set to False.

        Example:
            ```python
            from physical_operations_utils.AzureUtils import BlobStorage

            blobstorage = BlobStorage("esettbackup")
            blobstorage.write_text_file_to_blob("<html><body><h1>Hello, World!</h1></body></html>", "folder", "index.html", overwrite=True)
            ```
        """
        file_location = file_location.strip().rstrip("/")
        file_path = f"{file_location}/{file_name}"
        blob_client = self.container_client.get_blob_client(file_path)
        blob_client.upload_blob(content, overwrite=overwrite)

__init__(blob_storage_reference, use_sas_token=True)

Initializes the BlobStorage instance.

This method: 1. Reads the necessary keys from a YAML file using get_keys_yaml_file(). 2. Determines the authentication method based on the use_sas_token flag: - If use_sas_token is True, retrieves the SAS token from Azure Key Vault using get_secret(). - Otherwise, uses DefaultAzureCredential() for authentication. 3. Initializes the ContainerClient for interacting with the Azure Blob Storage container.

Parameters:

Name Type Description Default
blob_storage_reference str

The reference key to look up the storage details in the YAML file.

required
use_sas_token bool

Specifies whether to use a SAS token for authentication. Defaults to True.

True

Raises:

Type Description
ValueError

If required keys ('url', 'secret' when use_sas_token is True, 'container') are missing in the YAML file.

KeyError

If the reference key is not found in the YAML file.

Example
from physical_operations_utils.AzureUtils import BlobStorage

# Using SAS token authentication
blobstorage = BlobStorage("esettbackup")
blobstorage.container_client.exists()

# Using DefaultAzureCredential
blobstorage = BlobStorage("esettbackup", use_sas_token=False)
blobstorage.container_client.exists()

some_blob_file = blobstorage.container_client.get_blob_client(
    "20250212_DATA_PACKAGE_DP_MGA_IMB_BRP_14600_M2M_6a9622420f7344e99a51a2705047610a.xml"
)
Source code in physical_operations_utils/azure_utils/blob_storage.py
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
def __init__(self, blob_storage_reference: str, use_sas_token: bool = True):
    """
    Initializes the BlobStorage instance.

    This method:
    1. Reads the necessary keys from a YAML file using `get_keys_yaml_file()`.
    2. Determines the authentication method based on the `use_sas_token` flag:
       - If `use_sas_token` is `True`, retrieves the SAS token from Azure Key Vault using `get_secret()`.
       - Otherwise, uses `DefaultAzureCredential()` for authentication.
    3. Initializes the `ContainerClient` for interacting with the Azure Blob Storage container.

    Args:
        blob_storage_reference (str): The reference key to look up the storage details in the YAML file.
        use_sas_token (bool, optional): Specifies whether to use a SAS token for authentication. Defaults to `True`.

    Raises:
        ValueError: If required keys ('url', 'secret' when `use_sas_token` is `True`, 'container') are missing in the YAML file.
        KeyError: If the reference key is not found in the YAML file.

    Example:
        ```python
        from physical_operations_utils.AzureUtils import BlobStorage

        # Using SAS token authentication
        blobstorage = BlobStorage("esettbackup")
        blobstorage.container_client.exists()

        # Using DefaultAzureCredential
        blobstorage = BlobStorage("esettbackup", use_sas_token=False)
        blobstorage.container_client.exists()

        some_blob_file = blobstorage.container_client.get_blob_client(
            "20250212_DATA_PACKAGE_DP_MGA_IMB_BRP_14600_M2M_6a9622420f7344e99a51a2705047610a.xml"
        )
        ```
    """
    keys = get_keys_yaml_file()
    try:
        blob_storage_keys = keys[blob_storage_reference]
        if "url" not in blob_storage_keys:
            raise ValueError(f"url key not found for '{blob_storage_reference}'")
        if use_sas_token and "secret" not in blob_storage_keys:
            raise ValueError(f"secret key not found for '{blob_storage_reference}'")
        if "container" not in blob_storage_keys:
            raise ValueError(
                f"container key not found for '{blob_storage_reference}'"
            )
        url = blob_storage_keys["url"]
        if use_sas_token:
            secret_name = blob_storage_keys["secret"]
            credential = get_secret(secret_name)
        else:
            credential = DefaultAzureCredential()
        container = blob_storage_keys["container"]
    except KeyError:
        raise KeyError(
            f"Keys for '{blob_storage_reference}' not found in KEYS_FILE.yml"
        )
    logging.warning(
        f"Connecting to Azure Blob Storage with reference: {blob_storage_reference}"
    )
    logging.warning(f"url: {url}")
    logging.warning(f"container: {container}")
    if use_sas_token:
        logging.warning(f"secret_name: {secret_name}")
    self.container_client = ContainerClient(
        account_url=url, container_name=container, credential=credential
    )

write_pandas_df_as_csv(df, file_location, file_name, overwrite, column_seperator=';', decimal='.', include_index=False)

Writes a pandas DataFrame as a CSV file to Azure Blob Storage.

Parameters:

Name Type Description Default
df DataFrame

The DataFrame to be written as a CSV file.

required
file_location str

The directory path within the Blob Storage where the file should be stored.

required
file_name str

The name of the CSV file to be created.

required
overwrite bool

Whether to overwrite an existing file with the same name.

required
column_seperator str

The column separator for the CSV file. Defaults to ;.

';'
decimal str

The character to use for decimal points. Defaults to ..

'.'
include_index bool

Whether to include the DataFrame index in the CSV. Defaults to False.

False

Raises:

Type Description
Error

If the file already exists and overwrite is set to False.

Example
from physical_operations_utils.AzureUtils import BlobStorage
import pandas as pd

df = pd.DataFrame({"col1": [1, 2], "col2": [3, 4]})
blobstorage = BlobStorage("esettbackup")
blobstorage.write_pandas_df_as_csv(df, "folder", "data.csv", overwrite=True)
Source code in physical_operations_utils/azure_utils/blob_storage.py
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
def write_pandas_df_as_csv(
    self,
    df: pd.DataFrame,
    file_location: str,
    file_name: str,
    overwrite: bool,
    column_seperator: str = ";",
    decimal: str = ".",
    include_index: bool = False,
) -> None:
    """
    Writes a pandas DataFrame as a CSV file to Azure Blob Storage.

    Args:
        df (pd.DataFrame): The DataFrame to be written as a CSV file.
        file_location (str): The directory path within the Blob Storage where the file should be stored.
        file_name (str): The name of the CSV file to be created.
        overwrite (bool): Whether to overwrite an existing file with the same name.
        column_seperator (str, optional): The column separator for the CSV file. Defaults to `;`.
        decimal (str, optional): The character to use for decimal points. Defaults to `.`.
        include_index (bool, optional): Whether to include the DataFrame index in the CSV. Defaults to `False`.

    Raises:
        Error: If the file already exists and overwrite is set to False.

    Example:
        ```python
        from physical_operations_utils.AzureUtils import BlobStorage
        import pandas as pd

        df = pd.DataFrame({"col1": [1, 2], "col2": [3, 4]})
        blobstorage = BlobStorage("esettbackup")
        blobstorage.write_pandas_df_as_csv(df, "folder", "data.csv", overwrite=True)
        ```
    """
    csv_buffer = io.StringIO()
    df.to_csv(
        csv_buffer,
        sep=column_seperator,
        decimal=decimal,
        index=include_index,
    )
    content = csv_buffer.getvalue()
    file_location = file_location.strip().rstrip("/")
    file_path = f"{file_location}/{file_name}"
    blob_client = self.container_client.get_blob_client(file_path)
    blob_client.upload_blob(content, overwrite=overwrite)

write_text_file_to_blob(content, file_location, file_name, overwrite)

Writes a text file to Azure Blob Storage. Can be used to write html or xml files.

Parameters:

Name Type Description Default
content str

The content of the text file.

required
file_location str

The directory path within the Blob Storage where the file should be stored.

required
file_name str

The name of the text file to be created.

required
overwrite bool

Whether to overwrite an existing file with the same name.

required

Raises:

Type Description
Error

If the file already exists and overwrite is set to False.

Example
from physical_operations_utils.AzureUtils import BlobStorage

blobstorage = BlobStorage("esettbackup")
blobstorage.write_text_file_to_blob("<html><body><h1>Hello, World!</h1></body></html>", "folder", "index.html", overwrite=True)
Source code in physical_operations_utils/azure_utils/blob_storage.py
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
def write_text_file_to_blob(
    self,
    content: str,
    file_location: str,
    file_name: str,
    overwrite: bool,
) -> None:
    """
    Writes a text file to Azure Blob Storage. Can be used to write html or xml files.

    Args:
        content (str): The content of the text file.
        file_location (str): The directory path within the Blob Storage where the file should be stored.
        file_name (str): The name of the text file to be created.
        overwrite (bool): Whether to overwrite an existing file with the same name.

    Raises:
        Error: If the file already exists and overwrite is set to False.

    Example:
        ```python
        from physical_operations_utils.AzureUtils import BlobStorage

        blobstorage = BlobStorage("esettbackup")
        blobstorage.write_text_file_to_blob("<html><body><h1>Hello, World!</h1></body></html>", "folder", "index.html", overwrite=True)
        ```
    """
    file_location = file_location.strip().rstrip("/")
    file_path = f"{file_location}/{file_name}"
    blob_client = self.container_client.get_blob_client(file_path)
    blob_client.upload_blob(content, overwrite=overwrite)