refactor: move S3 functions to separate file for clarity
This commit is contained in:
+116
@@ -0,0 +1,116 @@
|
||||
#!/usr/bin/env python3
|
||||
import os
|
||||
import boto3
|
||||
from botocore.exceptions import ClientError
|
||||
from typing import List, Optional
|
||||
|
||||
|
||||
# S3 Configuration
|
||||
S3_BUCKET_NAME = os.environ.get("S3_BUCKET_NAME", "math-exercises")
|
||||
|
||||
|
||||
def get_s3_client():
|
||||
"""Create and return an S3 client using environment variables"""
|
||||
s3_access_key = os.environ.get("S3_ACCESS_KEY")
|
||||
s3_secret_key = os.environ.get("S3_SECRET_KEY")
|
||||
s3_host_base = os.environ.get("S3_HOST_BASE")
|
||||
|
||||
if not all([s3_access_key, s3_secret_key, s3_host_base]):
|
||||
raise ValueError("S3 environment variables not properly set")
|
||||
|
||||
s3 = boto3.client(
|
||||
"s3",
|
||||
aws_access_key_id=s3_access_key,
|
||||
aws_secret_access_key=s3_secret_key,
|
||||
endpoint_url=s3_host_base,
|
||||
region_name="us-east-1", # Required but unused for Infomaniak
|
||||
)
|
||||
return s3
|
||||
|
||||
|
||||
def create_bucket_if_not_exists(bucket_name: str):
|
||||
"""Create S3 bucket if it doesn't exist"""
|
||||
s3_client = get_s3_client()
|
||||
|
||||
try:
|
||||
s3_client.head_bucket(Bucket=bucket_name)
|
||||
except ClientError as e:
|
||||
error_code = int(e.response["Error"]["Code"])
|
||||
if error_code == 404:
|
||||
# Bucket doesn't exist, create it
|
||||
try:
|
||||
s3_client.create_bucket(Bucket=bucket_name)
|
||||
print(f"Bucket {bucket_name} created successfully")
|
||||
except ClientError as create_error:
|
||||
print(f"Error creating bucket: {create_error}")
|
||||
raise
|
||||
else:
|
||||
# Some other error
|
||||
print(f"Error checking bucket: {e}")
|
||||
raise
|
||||
|
||||
|
||||
def upload_to_s3(
|
||||
file_data: bytes,
|
||||
bucket_name: str,
|
||||
object_name: str,
|
||||
content_type: str = "application/pdf",
|
||||
) -> bool:
|
||||
"""Upload file data to S3 bucket"""
|
||||
s3_client = get_s3_client()
|
||||
|
||||
try:
|
||||
s3_client.put_object(
|
||||
Bucket=bucket_name,
|
||||
Key=object_name,
|
||||
Body=file_data,
|
||||
ContentType=content_type,
|
||||
)
|
||||
return True
|
||||
except ClientError as e:
|
||||
print(f"Error uploading to S3: {e}")
|
||||
return False
|
||||
|
||||
|
||||
def download_from_s3(bucket_name: str, object_name: str) -> Optional[bytes]:
|
||||
"""Download file data from S3 bucket"""
|
||||
s3_client = get_s3_client()
|
||||
|
||||
try:
|
||||
response = s3_client.get_object(Bucket=bucket_name, Key=object_name)
|
||||
return response["Body"].read()
|
||||
except ClientError as e:
|
||||
print(f"Error downloading from S3: {e}")
|
||||
return None
|
||||
|
||||
|
||||
def list_objects_in_s3(bucket_name: str) -> List[dict]:
|
||||
"""List all objects in S3 bucket (sorted from newest to oldest)"""
|
||||
s3_client = get_s3_client()
|
||||
|
||||
try:
|
||||
response = s3_client.list_objects_v2(Bucket=bucket_name)
|
||||
if "Contents" in response:
|
||||
# Filter for PDF files only and sort by last modified (newest first)
|
||||
pdf_files = [
|
||||
obj for obj in response["Contents"] if obj["Key"].endswith(".pdf")
|
||||
]
|
||||
pdf_files.sort(key=lambda x: x["LastModified"], reverse=True)
|
||||
return pdf_files
|
||||
else:
|
||||
return []
|
||||
except ClientError as e:
|
||||
print(f"Error listing objects in S3: {e}")
|
||||
return []
|
||||
|
||||
|
||||
def delete_from_s3(bucket_name: str, object_name: str) -> bool:
|
||||
"""Delete file from S3 bucket"""
|
||||
s3_client = get_s3_client()
|
||||
|
||||
try:
|
||||
s3_client.delete_object(Bucket=bucket_name, Key=object_name)
|
||||
return True
|
||||
except ClientError as e:
|
||||
print(f"Error deleting from S3: {e}")
|
||||
return False
|
||||
Reference in New Issue
Block a user