def avala_to_hf_dataset(annotations: list, images_dir: str) -> Dataset:
"""Convert Avala export to a Hugging Face Dataset."""
records = []
for ann in annotations:
boxes = []
labels = []
for obj in ann.get("annotations", []):
if obj["type"] == "bounding_box":
c = obj["coordinates"]
boxes.append([c["x"], c["y"], c["x"] + c["width"], c["y"] + c["height"]])
labels.append(obj["label"])
records.append({
"image": f"{images_dir}/{ann['file_name']}",
"boxes": boxes,
"labels": labels,
})
return Dataset.from_list(records).cast_column("image", Image())
dataset = avala_to_hf_dataset(annotations, "./images")
print(dataset)
# Dataset({
# features: ['image', 'boxes', 'labels'],
# num_rows: 1200
# })