Dataset for model training.
Prerequisites
pip install avala torch torchvision pillow
Export Annotations
from avala import Client
client = Client() # reads AVALA_API_KEY
# Create an export for your project
export = client.exports.create(project="proj_abc123")
# Poll until ready
import time
while export.status != "completed":
time.sleep(2)
export = client.exports.get(export.uid)
print(f"Download: {export.download_url}")
Load into a PyTorch Dataset
import json
import requests
from pathlib import Path
from PIL import Image
from torch.utils.data import Dataset, DataLoader
import torchvision.transforms as T
class AvalaDataset(Dataset):
"""PyTorch dataset backed by an Avala JSON export."""
def __init__(self, export_path: str, images_dir: str, transform=None):
with open(export_path) as f:
self.annotations = json.load(f)
self.images_dir = Path(images_dir)
self.transform = transform or T.Compose([
T.Resize((224, 224)),
T.ToTensor(),
T.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
])
def __len__(self):
return len(self.annotations)
def __getitem__(self, idx):
ann = self.annotations[idx]
image = Image.open(self.images_dir / ann["file_name"]).convert("RGB")
image = self.transform(image)
# Extract bounding boxes and labels
boxes = []
labels = []
for obj in ann.get("annotations", []):
if obj["type"] == "bounding_box":
c = obj["coordinates"]
boxes.append([c["x"], c["y"], c["x"] + c["width"], c["y"] + c["height"]])
labels.append(obj["label"])
return image, {"boxes": boxes, "labels": labels}
# Usage
dataset = AvalaDataset("export.json", "./images")
loader = DataLoader(dataset, batch_size=8, shuffle=True)
for images, targets in loader:
# Your training loop here
pass
End-to-End Script
A complete script that exports from Avala, downloads images, and starts training:import json
import time
import requests
from pathlib import Path
from avala import Client
from torch.utils.data import DataLoader
def export_and_train(project_uid: str, images_dir: str):
client = Client()
# 1. Export annotations
export = client.exports.create(project=project_uid)
while export.status != "completed":
time.sleep(2)
export = client.exports.get(export.uid)
# 2. Download the export
resp = requests.get(export.download_url)
Path("export.json").write_bytes(resp.content)
# 3. Create dataset and dataloader
dataset = AvalaDataset("export.json", images_dir)
loader = DataLoader(dataset, batch_size=8, shuffle=True)
print(f"Training on {len(dataset)} samples")
# 4. Training loop (replace with your model)
for epoch in range(10):
for batch_idx, (images, targets) in enumerate(loader):
# model(images, targets)
pass
print(f"Epoch {epoch + 1} complete")
export_and_train("proj_abc123", "./images")
The SDK is currently read-only for datasets. Use the REST API or Mission Control to upload images before exporting annotations. See File Uploads.
Next Steps
- Python SDK reference for all available methods
- Export API for export format details
- Avala + Hugging Face for Hugging Face Datasets integration