I am trying to fine tune the new SAM 2.1 segmentation model using the roboflow guide given here using WSL2 and VSCode python 3.10 and on windows 11: How to Fine-Tune SAM-2.1 on a Custom Dataset
Currently I have this code that segments my fine tuned model as well as base sam2.1 and compares them into 2 images.
import torch
from sam2.build_sam import build_sam2
from sam2.automatic_mask_generator import SAM2AutomaticMaskGenerator
import supervision as sv
import os
import random
from PIL import Image
import numpy as np
# use bfloat16 for the entire notebook
# from Meta notebook
torch.autocast("cuda", dtype=torch.bfloat16).__enter__()
if torch.cuda.get_device_properties(0).major >= 8:
torch.backends.cuda.matmul.allow_tf32 = True
torch.backends.cudnn.allow_tf32 = True
checkpoint = "./sam2_logs/configs/train.yaml/checkpoints/checkpoint.pt"
model_cfg = "./configs/sam2.1/sam2.1_hiera_b+.yaml"
sam2 = build_sam2(model_cfg, checkpoint, device="cuda")
mask_generator = SAM2AutomaticMaskGenerator(sam2)
checkpoint_base = "./checkpoints/sam2.1_hiera_base_plus.pt"
model_cfg_base = "./configs/sam2.1/sam2.1_hiera_b+.yaml"
sam2_base = build_sam2(model_cfg_base, checkpoint_base, device="cuda")
mask_generator_base = SAM2AutomaticMaskGenerator(sam2_base)
validation_set = os.listdir("../car_segmentation2-3/valid")
# choose random with .json extension
image = random.choice([img for img in validation_set if img.endswith(".jpg")])
image = os.path.join("../car_segmentation2-3/valid", image)
opened_image = np.array(Image.open(image).convert("RGB"))
result = mask_generator.generate(opened_image)
detections = sv.Detections.from_sam(sam_result=result)
# Check what classes we have
for detection in detections:
print(detection)
# If the detection belongs to the target class, add it to filtered_detections
#if detection.class_id == target_class_id:
# filtered_detections.append(detection)
mask_annotator = sv.MaskAnnotator(color_lookup = sv.ColorLookup.INDEX)
annotated_image = opened_image.copy()
annotated_image = mask_annotator.annotate(annotated_image, detections=detections)
base_annotator = sv.MaskAnnotator(color_lookup = sv.ColorLookup.INDEX)
base_result = mask_generator_base.generate(opened_image)
base_detections = sv.Detections.from_sam(sam_result=base_result)
base_annotated_image = opened_image.copy()
base_annotated_image = base_annotator.annotate(base_annotated_image, detections=base_detections)
#sv.plot_images_grid(images=[annotated_image, base_annotated_image], titles=["Fine-Tuned SAM-2.1", "Base SAM-2.1"], grid_size=(1, 2))
# Save the images as .png files
output_dir = "./output_results" # Specify the output directory
os.makedirs(output_dir, exist_ok=True)
# Save both annotated images as .png
fine_tuned_output_path = os.path.join(output_dir, "fine_tuned_SAM_2.1.png")
base_output_path = os.path.join(output_dir, "base_SAM_2.1.png")
# Convert arrays to PIL Images and save them TODO
#Image.fromarray(annotated_image).save(fine_tuned_output_path)
#Image.fromarray(base_annotated_image).save(base_output_path)
However, I can’t seem to figure out how to only segment the regions I want in the fine tuned model, as currently in the fine tuned output image sam2.1 is still segmenting (colour labeling) areas of the image I don’t care for / haven’t included in my training dataset masks.