I am writing a script to calculate performance metrics on an eval dataset using a model I trained in the Roboflow UI. So far I cannot get my MAR and MAP metrics to match what are given in the UI after a model finishes training. I am using a YOLOv11 model and dataset. Anyone have an idea why the discrepancy?
MAP @ 50: 0.4156380482177502
MAR @ 100: 0.2680126076647888
F1score @ 50: 0.7939534061499741
def predict_image(image_path, visualize=True):
url = f"http://localhost:9001/{MODEL_ID}/{MODEL_VERSION}"
headers = {"Content-Type": "application/json"}
params = {
"api_key": API_KEY,
"confidence": 0.5,
}
with open(image_path, "rb") as image_file:
encoded_image = base64.b64encode(image_file.read()).decode("utf-8")
image = cv2.imread(image_path)
response = requests.post(
url, headers=headers, params=params, data=encoded_image
).json()
detections = sv.Detections.from_inference(response)
if visualize:
box_annotator = sv.BoxAnnotator()
label_annotator = sv.LabelAnnotator()
labels = [
f"{class_name} {det_confidence:0.2f}"
for det_confidence, class_name in zip(detections.confidence, detections.data["class_name"])
]
annotated_image = box_annotator.annotate(
image, detections=detections
)
annotated_image = label_annotator.annotate(
annotated_image, detections=detections, labels=labels
)
cv2.imshow("Annotated image", annotated_image)
cv2.waitKey(0)
return detections
def iter_dataset(path, dataset_dirs=["test", "valid"]):
path = Path(path)
for ds_dir in dataset_dirs:
ds_path = path / ds_dir
ds = sv.DetectionDataset.from_yolo(
images_directory_path=str(ds_path / "images"),
annotations_directory_path=str(ds_path / "labels"),
data_yaml_path=str(path / "data.yaml"),
)
for img_path, img_arr, detections in ds:
yield img_path, img_arr, detections
def perform_prediction(dataset_path, visualize=False, count=None):
all_predictions = []
all_targets = []
current_count = 0
for idx, (image_file, img_arr, gt_detections) in enumerate(
iter_dataset(dataset_path)
):
if count:
if current_count == count:
break
current_count += 1
detections = predict_image(image_file, visualize=visualize)
all_predictions.append(detections)
all_targets.append(gt_detections)
return all_targets, all_predictions
def calc_metrics(targets, predictions):
# Calculate precision
precision_metric = MeanAveragePrecision(metric_target=MetricTarget.MASKS)
precision_result = precision_metric.update(predictions, targets).compute()
print(f"MAP @ 50: {precision_result.map50_95}")
# Calculate recall
recall_metric = MeanAverageRecall(metric_target=MetricTarget.MASKS)
recall_result = recall_metric.update(predictions, targets).compute()
print(f"MAR @ 100: {recall_result.mAR_at_100}")
# Calculate F1 score
f1_metric = F1Score(metric_target=MetricTarget.MASKS)
f1_result = f1_metric.update(predictions, targets).compute()
print(f"F1score @ 50: {f1_result.f1_50}")
return precision_result, recall_result, f1_result
if __name__ == "__main__":
dataset_path = sys.argv[1]
targets, predictions = perform_prediction(
dataset_path,
# count=10,
# visualize=True,
)
precision, recall, f1_score = calc_metrics(targets, predictions)