In this tutorial, we'll learn how to use the Container.ds storage backend to store and retrieve data in Framework3.
In [1]:
Copied!
from framework3.utils.patch_type_guard import patch_inspect_for_notebooks
patch_inspect_for_notebooks()
from framework3.utils.patch_type_guard import patch_inspect_for_notebooks
patch_inspect_for_notebooks()
✅ Patched inspect.getsource using dill.
In [2]:
Copied!
# Import necessary libraries
from framework3.container import Container
import pandas as pd
import numpy as np
# Import necessary libraries
from framework3.container import Container
import pandas as pd
import numpy as np
In [3]:
Copied!
from dotenv import load_dotenv
from pathlib import Path
import os
env_path = Path("../../../.env")
load_dotenv(dotenv_path=env_path)
from dotenv import load_dotenv
from pathlib import Path
import os
env_path = Path("../../../.env")
load_dotenv(dotenv_path=env_path)
Out[3]:
True
1. Storing Data¶
In [4]:
Copied!
# Create sample data
df = pd.DataFrame(
{
"A": np.random.rand(100),
"B": np.random.randint(0, 100, 100),
"C": ["cat", "dog", "bird"] * 33 + ["cat"],
}
)
# Store the DataFrame
Container.ds.save("sample_data_local", df)
print("Data stored successfully locally")
# Create sample data
df = pd.DataFrame(
{
"A": np.random.rand(100),
"B": np.random.randint(0, 100, 100),
"C": ["cat", "dog", "bird"] * 33 + ["cat"],
}
)
# Store the DataFrame
Container.ds.save("sample_data_local", df)
print("Data stored successfully locally")
* Saving in local path: cache/datasets/sample_data_local * Saved ! Data stored successfully locally
2. Listing Data¶
In [5]:
Copied!
local_files = Container.ds.list()
print("Files in local storage:", local_files)
local_files = Container.ds.list()
print("Files in local storage:", local_files)
Files in local storage: ['sample_data_s3', 'sample_data', 'sample_data_local']
3. Retrieving Data¶
In [6]:
Copied!
retrieved_df = Container.ds.load("sample_data_local")
print("Data retrieved successfully from local storage")
print(retrieved_df.value.head())
retrieved_df = Container.ds.load("sample_data_local")
print("Data retrieved successfully from local storage")
print(retrieved_df.value.head())
Data retrieved successfully from local storage * Downloading: <_io.BufferedReader name='cache/datasets/sample_data_local'> A B C 0 0.273884 59 cat 1 0.049904 84 dog 2 0.872462 70 bird 3 0.795624 34 cat 4 0.763532 42 dog
4. Updating Stored Data¶
In [7]:
Copied!
# Update the DataFrame
df["D"] = np.random.choice(["X", "Y", "Z"], 100)
# Store the updated DataFrame locally
Container.ds.update("sample_data_local", df)
print("Updated data stored successfully locally")
# Retrieve and display the updated DataFrame
updated_df = Container.ds.load("sample_data_local")
print(updated_df.value.head())
# Update the DataFrame
df["D"] = np.random.choice(["X", "Y", "Z"], 100)
# Store the updated DataFrame locally
Container.ds.update("sample_data_local", df)
print("Updated data stored successfully locally")
# Retrieve and display the updated DataFrame
updated_df = Container.ds.load("sample_data_local")
print(updated_df.value.head())
* Saving in local path: cache/datasets/sample_data_local * Saved ! Updated data stored successfully locally * Downloading: <_io.BufferedReader name='cache/datasets/sample_data_local'> A B C D 0 0.273884 59 cat Y 1 0.049904 84 dog Y 2 0.872462 70 bird Z 3 0.795624 34 cat X 4 0.763532 42 dog Z
5. Deleting Stored Data¶
In [8]:
Copied!
# Delete the stored data from local storage
Container.ds.delete("sample_data_local")
print("Data deleted successfully from local storage")
# Delete the stored data from local storage
Container.ds.delete("sample_data_local")
print("Data deleted successfully from local storage")
Data deleted successfully from local storage
In [9]:
Copied!
from framework3.plugins.storage import S3Storage
s3_storage = S3Storage(
bucket=os.environ.get("TEST_BUCKET_NAME"), # type: ignore
region_name=os.environ.get("REGION_NAME"), # type: ignore
access_key=os.environ.get("TEST_ACCESS_KEY"), # type: ignore
access_key_id=os.environ.get("TEST_ACCESS_KEY_ID"), # type: ignore
endpoint_url=os.environ.get("ENDPOINT_URL"),
)
from framework3.plugins.storage import S3Storage
s3_storage = S3Storage(
bucket=os.environ.get("TEST_BUCKET_NAME"), # type: ignore
region_name=os.environ.get("REGION_NAME"), # type: ignore
access_key=os.environ.get("TEST_ACCESS_KEY"), # type: ignore
access_key_id=os.environ.get("TEST_ACCESS_KEY_ID"), # type: ignore
endpoint_url=os.environ.get("ENDPOINT_URL"),
)
In [11]:
Copied!
Container.storage = s3_storage
Container.storage = s3_storage
In [12]:
Copied!
Container.ds.save("sample_data_s3", df)
print("Data stored successfully in S3")
Container.ds.save("sample_data_s3", df)
print("Data stored successfully in S3")
- Binary prepared! - Stream ready! * Object size 8e-08 GBs Upload Complete! Data stored successfully in S3
In [13]:
Copied!
s3_files = Container.ds.list()
print("Files in S3 bucket:", s3_files)
s3_files = Container.ds.list()
print("Files in S3 bucket:", s3_files)
Files in S3 bucket: ['datasets/Iris X data.pkl', 'test-bucket/datasets/sample_data_s3']
In [14]:
Copied!
retrieved_df = Container.ds.load("sample_data_s3")
print("Data retrieved successfully from S3")
print(retrieved_df.value.head())
retrieved_df = Container.ds.load("sample_data_s3")
print("Data retrieved successfully from S3")
print(retrieved_df.value.head())
Data retrieved successfully from S3 A B C D 0 0.273884 59 cat Y 1 0.049904 84 dog Y 2 0.872462 70 bird Z 3 0.795624 34 cat X 4 0.763532 42 dog Z
In [15]:
Copied!
# Update the DataFrame
df["E"] = np.random.choice(["P", "Q", "R"], 100)
# Store the updated DataFrame in S3
Container.ds.update("sample_data_s3", df)
print("Updated data stored successfully in S3")
# Retrieve and display the updated DataFrame
updated_df = Container.ds.load("sample_data_s3")
print(updated_df.value.head())
# Update the DataFrame
df["E"] = np.random.choice(["P", "Q", "R"], 100)
# Store the updated DataFrame in S3
Container.ds.update("sample_data_s3", df)
print("Updated data stored successfully in S3")
# Retrieve and display the updated DataFrame
updated_df = Container.ds.load("sample_data_s3")
print(updated_df.value.head())
- Binary prepared! - Stream ready! * Object size 8e-08 GBs Upload Complete! Updated data stored successfully in S3 A B C D E 0 0.273884 59 cat Y R 1 0.049904 84 dog Y P 2 0.872462 70 bird Z P 3 0.795624 34 cat X P 4 0.763532 42 dog Z P
In [16]:
Copied!
# Delete the stored data from S3
Container.ds.delete("sample_data_s3")
print("Data deleted successfully from S3")
# Delete the stored data from S3
Container.ds.delete("sample_data_s3")
print("Data deleted successfully from S3")
Deleted! Data deleted successfully from S3