How to segment desired areas in SAM 2.1 segmentation

I am trying to fine tune the new SAM 2.1 segmentation model using the roboflow guide given here using WSL2 and VSCode python 3.10 and on windows 11: How to Fine-Tune SAM-2.1 on a Custom Dataset

Currently I have this code that segments my fine tuned model as well as base sam2.1 and compares them into 2 images.

import torch
from sam2.build_sam import build_sam2
from sam2.automatic_mask_generator import SAM2AutomaticMaskGenerator
import supervision as sv
import os
import random
from PIL import Image
import numpy as np

# use bfloat16 for the entire notebook
# from Meta notebook
torch.autocast("cuda", dtype=torch.bfloat16).__enter__()
if torch.cuda.get_device_properties(0).major >= 8:
    torch.backends.cuda.matmul.allow_tf32 = True
    torch.backends.cudnn.allow_tf32 = True

checkpoint = "./sam2_logs/configs/train.yaml/checkpoints/checkpoint.pt"
model_cfg = "./configs/sam2.1/sam2.1_hiera_b+.yaml"
sam2 = build_sam2(model_cfg, checkpoint, device="cuda")
mask_generator = SAM2AutomaticMaskGenerator(sam2)

checkpoint_base = "./checkpoints/sam2.1_hiera_base_plus.pt"
model_cfg_base = "./configs/sam2.1/sam2.1_hiera_b+.yaml"
sam2_base = build_sam2(model_cfg_base, checkpoint_base, device="cuda")
mask_generator_base = SAM2AutomaticMaskGenerator(sam2_base)

validation_set = os.listdir("../car_segmentation2-3/valid")

# choose random with .json extension
image = random.choice([img for img in validation_set if img.endswith(".jpg")])
image = os.path.join("../car_segmentation2-3/valid", image)
opened_image = np.array(Image.open(image).convert("RGB"))
result = mask_generator.generate(opened_image)

detections = sv.Detections.from_sam(sam_result=result)

# Check what classes we have
for detection in detections:
	print(detection)
    # If the detection belongs to the target class, add it to filtered_detections
    #if detection.class_id == target_class_id:
    #    filtered_detections.append(detection)

mask_annotator = sv.MaskAnnotator(color_lookup = sv.ColorLookup.INDEX)
annotated_image = opened_image.copy()
annotated_image = mask_annotator.annotate(annotated_image, detections=detections)

base_annotator = sv.MaskAnnotator(color_lookup = sv.ColorLookup.INDEX)

base_result = mask_generator_base.generate(opened_image)
base_detections = sv.Detections.from_sam(sam_result=base_result)
base_annotated_image = opened_image.copy()
base_annotated_image = base_annotator.annotate(base_annotated_image, detections=base_detections)

#sv.plot_images_grid(images=[annotated_image, base_annotated_image], titles=["Fine-Tuned SAM-2.1", "Base SAM-2.1"], grid_size=(1, 2))

# Save the images as .png files
output_dir = "./output_results"  # Specify the output directory
os.makedirs(output_dir, exist_ok=True)

# Save both annotated images as .png
fine_tuned_output_path = os.path.join(output_dir, "fine_tuned_SAM_2.1.png")
base_output_path = os.path.join(output_dir, "base_SAM_2.1.png")

# Convert arrays to PIL Images and save them TODO
#Image.fromarray(annotated_image).save(fine_tuned_output_path)
#Image.fromarray(base_annotated_image).save(base_output_path)

However, I can’t seem to figure out how to only segment the regions I want in the fine tuned model, as currently in the fine tuned output image sam2.1 is still segmenting (colour labeling) areas of the image I don’t care for / haven’t included in my training dataset masks.

You might need to edit the training yaml file to train for more epochs or use a larger learning rate

If that still doesn’t work, you might try our instance-segmentation or semantic-segmentation offerings

Hey thanks for the response, just confused how would more epochs or learning rate help segment out just the areas in the training mask?

It sounds like the model isn’t learning the task you’ve given it completely, if it is still segmenting parts of the image you don’t want segmented. More training data may help here as well.