How to create a Table#
A Table is a collection of samples belonging to a dataset. It is the fundamental data structure in 3LC. Tables are used as a means to translate and share data between the Dashboard and the Python package.
There are many ways to create a Table. We have a number of utility functions for common dataset formats like PyTorch,
ImageFolder, COCO, YOLO, and Pandas. You can also use a TableWriter
to write a Table from an arbitrary format.
Common formats#
Image folder structure#
If your dataset is in a folder structure as illustrated below, you can use the Table.from_image_folder
method.
Images-folder/
├── class1/
│ ├── image1.jpg
│ ├── image2.jpg
│ └── ...
└── class2/
├── image1.jpg
├── image2.jpg
└── ...
import tlc
data_path = "/path/to/images/folder"
table = tlc.Table.from_image_folder(
root = data_path,
table_name = "my_table",
dataset_name = "my_dataset",
project_name = "my_project"
)
Torchvision formatted datasets#
If your dataset is a PyTorch Dataset
, you can use the Table.from_torch_dataset()
method.
import tlc
import torchvision
train_dataset = torchvision.datasets.CIFAR10(root=TRANSIENT_DATA_PATH, train=True, download=True)
val_dataset = torchvision.datasets.CIFAR10(root=TRANSIENT_DATA_PATH, train=False)
class_names = train_dataset.classes
structure = (
tlc.PILImage("image"),
tlc.CategoricalLabel("label", classes=class_names),
)
train_table = tlc.Table.from_torch_dataset(
dataset = train_dataset,
dataset_name = "train-set",
table_name = "original",
project_name = "Cifar-10",
description = "CIFAR-10 training dataset",
structure = structure,
if_exists = "overwrite",
)
val_table = tlc.Table.from_torch_dataset(
dataset = val_dataset,
dataset_name = "val-set",
table_name = "original",
project_name = "Cifar-10",
description = "CIFAR-10 validation dataset",
structure = structure,
if_exists = "overwrite",
)
COCO datasets#
If your dataset is in the COCO format, you can use the Table.from_coco
method.
import tlc
IMAGE_PATH = "/path/to/image/folder"
ANNOTATION_PATH = "/path/to/annotation/json/file"
table = tlc.Table.from_coco(
annotations_file = ANNOTATION_PATH,
image_folder = IMAGE_PATH,
table_name = "original",
dataset_name = "my_dataset",
project_name = "my_project",
)
YOLO datasets#
If your dataset is in YOLO format, you can use the Table.from_yolo
method.
import tlc
YAML_PATH = "/path/to/data.yaml"
table = tlc.Table.from_yolo(
dataset_yaml_file = YAML_PATH,
split = "train",
table_name = "original",
dataset_name = "my_dataset",
project_name = "my_project",
)
Pandas dataframe#
If you have a Pandas dataframe dataset, you can use the
Table.from_pandas
method.
import pandas as pd
import tlc
df = pd.read_csv("path/to/data.csv")
table = tlc.Table.from_pandas(
df,
table_name="original",
dataset_name="my_dataset",
project_name="my_project",
)
Custom format#
If your dataset is not in any of the common formats, you can create a Table from scratch using a
TableWriter
.
import tlc
class_names = [] # list of classes
image_path_list = [] # list of image paths (absolute paths)
label_list = [] # list of labels (same length as image_path_list)
table_writer = tlc.TableWriter(
project_name = "My Project",
dataset_name = "My Dataset",
table_name = "My Table",
column_schemas = {"image":tlc.ImagePath("image"),
"label":tlc.CategoricalLabel("label", classes=class_names),
"weight":tlc.SampleWeightSchema()
}
)
# you can add data row by row
for image_path, label_name in zip(image_path_list, label_list):
label = class_names.index(label_name)
table_writer.add_row({"image": image_path, "label": label, "weight": 1.0})
# or you can add data in batch
label_index_list = [class_names.index(label_name) for label_name in label_list]
weight_list = [1.0] * len(image_path_list)
table_writer.add_batch({"image": image_path_list, "label": label_index_list, "weight": weight_list})
# finalize the table
table = table_writer.finalize()
For writing a free-style Table for bounding box object detection datasets, please refer to the public example notebook in https://github.com/3lc-ai/3lc-examples/blob/main/tutorials/write-bb-table.ipynb.
Special cases#
Detectron2#
When using 3LC’s Detectron2 integration, you can use the
register_coco_instances
method
to create a Table for COCO datasets, instead of Table.from_coco()
.
from tlc.integration.detectron2 import register_coco_instances
from detectron2.data import MetadataCatalog, DatasetCatalog
json_file_path = "path/to/json/file"
image_folder_path = "path/to/image/folder"
DATASET_NAME = "my_dataset"
register_coco_instances(
name = DATASET_NAME,
project_name = "my_project",
json_file = json_file_path,
image_root = image_folder_path,
)
dataset_metadata = MetadataCatalog.get(DATASET_NAME)
dataset_dicts = DatasetCatalog.get(DATASET_NAME)
YOLOv8#
If you are using our ultralytics integration to run YOLOv8 training, Tables will be automatically created when calling
TLCYOLO.train
.
from ultralytics.utils.tlc.detect.model import TLCYOLO
from ultralytics.utils.tlc.detect.settings import Settings
# Set 3LC specific settings
settings = Settings(
project_name="my_project",
run_name="my_run"
)
YAML_FILE_PATH = "path/to/yaml/file"
model = TLCYOLO("yolov8n.pt")
model.train(data=YAML_FILE_PATH, settings=settings, epochs=20)