Documentation Index
Fetch the complete documentation index at: https://avala.ai/docs/llms.txt
Use this file to discover all available pages before exploring further.
This guide shows how to export annotations from Avala and load them into a Hugging Face Dataset for fine-tuning or evaluation.
Prerequisites
pip install avala datasets pillow
Export and Load
from avala import Client
from datasets import Dataset, Features, Value, Sequence, Image
import json
import time
client = Client() # reads AVALA_API_KEY
# Export annotations
export = client.exports.create(project="proj_abc123")
while export.status != "completed":
time.sleep(2)
export = client.exports.get(export.uid)
# Download the export
import requests
resp = requests.get(export.download_url)
annotations = resp.json()
Convert to Hugging Face Dataset
def avala_to_hf_dataset(annotations: list, images_dir: str) -> Dataset:
"""Convert Avala export to a Hugging Face Dataset."""
records = []
for ann in annotations:
boxes = []
labels = []
for obj in ann.get("annotations", []):
if obj["type"] == "bounding_box":
c = obj["coordinates"]
boxes.append([c["x"], c["y"], c["x"] + c["width"], c["y"] + c["height"]])
labels.append(obj["label"])
records.append({
"image": f"{images_dir}/{ann['file_name']}",
"boxes": boxes,
"labels": labels,
})
return Dataset.from_list(records).cast_column("image", Image())
dataset = avala_to_hf_dataset(annotations, "./images")
print(dataset)
# Dataset({
# features: ['image', 'boxes', 'labels'],
# num_rows: 1200
# })
Use with a Hugging Face Trainer
from transformers import AutoModelForObjectDetection, TrainingArguments, Trainer
model = AutoModelForObjectDetection.from_pretrained(
"facebook/detr-resnet-50",
num_labels=len(label_names),
ignore_mismatched_sizes=True,
)
training_args = TrainingArguments(
output_dir="./results",
num_train_epochs=10,
per_device_train_batch_size=4,
learning_rate=1e-5,
)
trainer = Trainer(
model=model,
args=training_args,
train_dataset=dataset,
)
trainer.train()
Classification Datasets
For image classification projects, the conversion is simpler:
def avala_classification_to_hf(annotations: list, images_dir: str) -> Dataset:
"""Convert classification annotations to HF Dataset."""
records = []
for ann in annotations:
label = ann.get("annotations", [{}])[0].get("label", "unknown")
records.append({
"image": f"{images_dir}/{ann['file_name']}",
"label": label,
})
return Dataset.from_list(records).cast_column("image", Image())
dataset = avala_classification_to_hf(annotations, "./images")
# Split into train/test
split = dataset.train_test_split(test_size=0.2, seed=42)
train_dataset = split["train"]
test_dataset = split["test"]
Push to Hugging Face Hub
Share your annotated dataset with your team or the community:
dataset.push_to_hub("your-org/annotated-dataset", private=True)
The SDK is currently read-only for datasets. Use the REST API or Mission Control to upload images before exporting annotations. See File Uploads.
Next Steps