From 42aaf02b218c8cafb5a3279284cdc5ffe0f060ae Mon Sep 17 00:00:00 2001
From: benITo47 <bartlomiej.obrochta03@gmail.com>
Date: Fri, 20 Mar 2026 10:08:10 +0100
Subject: [PATCH 1/8] feat: Add multimethod handling to ObjectDetection

---
 .../app/object_detection/index.tsx            |   2 +
 .../app/vision_camera/index.tsx               |   7 +-
 .../tasks/ObjectDetectionTask.tsx             |  17 +-
 .../object_detection/ObjectDetection.cpp      | 158 ++++++++++-----
 .../models/object_detection/ObjectDetection.h |  65 +++++-
 .../src/constants/modelUrls.ts                |  47 +++++
 .../computer_vision/useObjectDetection.ts     |  12 +-
 .../computer_vision/ObjectDetectionModule.ts  | 189 +++++++++++++++++-
 .../src/types/objectDetection.ts              |  85 ++++++--
 9 files changed, 504 insertions(+), 78 deletions(-)

diff --git a/apps/computer-vision/app/object_detection/index.tsx b/apps/computer-vision/app/object_detection/index.tsx
index c6ec9f1dc3..399939cfcb 100644
--- a/apps/computer-vision/app/object_detection/index.tsx
+++ b/apps/computer-vision/app/object_detection/index.tsx
@@ -7,6 +7,7 @@ import {
   useObjectDetection,
   RF_DETR_NANO,
   SSDLITE_320_MOBILENET_V3_LARGE,
+  YOLO26N,
   ObjectDetectionModelSources,
 } from 'react-native-executorch';
 import { View, StyleSheet, Image } from 'react-native';
@@ -18,6 +19,7 @@ import ScreenWrapper from '../../ScreenWrapper';
 const MODELS: ModelOption<ObjectDetectionModelSources>[] = [
   { label: 'RF-DeTR Nano', value: RF_DETR_NANO },
   { label: 'SSDLite MobileNet', value: SSDLITE_320_MOBILENET_V3_LARGE },
+  { label: 'YOLO26N', value: YOLO26N },
 ];
 
 export default function ObjectDetectionScreen() {
diff --git a/apps/computer-vision/app/vision_camera/index.tsx b/apps/computer-vision/app/vision_camera/index.tsx
index dbd969ad09..c13e55925c 100644
--- a/apps/computer-vision/app/vision_camera/index.tsx
+++ b/apps/computer-vision/app/vision_camera/index.tsx
@@ -46,6 +46,7 @@ type ModelId =
   | 'classification'
   | 'objectDetectionSsdlite'
   | 'objectDetectionRfdetr'
+  | 'objectDetectionYolo26n'
   | 'segmentationDeeplabResnet50'
   | 'segmentationDeeplabResnet101'
   | 'segmentationDeeplabMobilenet'
@@ -95,6 +96,7 @@ const TASKS: Task[] = [
     variants: [
       { id: 'objectDetectionSsdlite', label: 'SSDLite MobileNet' },
       { id: 'objectDetectionRfdetr', label: 'RF-DETR Nano' },
+      { id: 'objectDetectionYolo26n', label: 'YOLO26N' },
     ],
   },
   {
@@ -241,7 +243,10 @@ export default function VisionCameraScreen() {
         <ObjectDetectionTask
           {...taskProps}
           activeModel={
-            activeModel as 'objectDetectionSsdlite' | 'objectDetectionRfdetr'
+            activeModel as
+              | 'objectDetectionSsdlite'
+              | 'objectDetectionRfdetr'
+              | 'objectDetectionYolo26n'
           }
         />
       )}
diff --git a/apps/computer-vision/components/vision_camera/tasks/ObjectDetectionTask.tsx b/apps/computer-vision/components/vision_camera/tasks/ObjectDetectionTask.tsx
index 0155be7e46..243a3ee09d 100644
--- a/apps/computer-vision/components/vision_camera/tasks/ObjectDetectionTask.tsx
+++ b/apps/computer-vision/components/vision_camera/tasks/ObjectDetectionTask.tsx
@@ -6,12 +6,16 @@ import {
   Detection,
   RF_DETR_NANO,
   SSDLITE_320_MOBILENET_V3_LARGE,
+  YOLO26N,
   useObjectDetection,
 } from 'react-native-executorch';
 import { labelColor, labelColorBg } from '../utils/colors';
 import { TaskProps } from './types';
 
-type ObjModelId = 'objectDetectionSsdlite' | 'objectDetectionRfdetr';
+type ObjModelId =
+  | 'objectDetectionSsdlite'
+  | 'objectDetectionRfdetr'
+  | 'objectDetectionYolo26n';
 
 type Props = TaskProps & { activeModel: ObjModelId };
 
@@ -34,8 +38,17 @@ export default function ObjectDetectionTask({
     model: RF_DETR_NANO,
     preventLoad: activeModel !== 'objectDetectionRfdetr',
   });
+  const yolo26n = useObjectDetection({
+    model: YOLO26N,
+    preventLoad: activeModel !== 'objectDetectionYolo26n',
+  });
 
-  const active = activeModel === 'objectDetectionSsdlite' ? ssdlite : rfdetr;
+  const active =
+    activeModel === 'objectDetectionSsdlite'
+      ? ssdlite
+      : activeModel === 'objectDetectionRfdetr'
+        ? rfdetr
+        : yolo26n;
 
   const [detections, setDetections] = useState<Detection[]>([]);
   const [imageSize, setImageSize] = useState({ width: 1, height: 1 });
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.cpp b/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.cpp
index f81d648bb5..eb0943c2f6 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.cpp
+++ b/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.cpp
@@ -1,6 +1,8 @@
 #include "ObjectDetection.h"
 #include "Constants.h"
 
+#include <set>
+
 #include <rnexecutorch/Error.h>
 #include <rnexecutorch/ErrorCodes.h>
 #include <rnexecutorch/Log.h>
@@ -18,21 +20,6 @@ ObjectDetection::ObjectDetection(
     std::shared_ptr<react::CallInvoker> callInvoker)
     : VisionModel(modelSource, callInvoker),
       labelNames_(std::move(labelNames)) {
-  auto inputTensors = getAllInputShapes();
-  if (inputTensors.empty()) {
-    throw RnExecutorchError(RnExecutorchErrorCode::UnexpectedNumInputs,
-                            "Model seems to not take any input tensors.");
-  }
-  modelInputShape_ = inputTensors[0];
-  if (modelInputShape_.size() < 2) {
-    char errorMessage[100];
-    std::snprintf(errorMessage, sizeof(errorMessage),
-                  "Unexpected model input size, expected at least 2 dimensions "
-                  "but got: %zu.",
-                  modelInputShape_.size());
-    throw RnExecutorchError(RnExecutorchErrorCode::UnexpectedNumInputs,
-                            errorMessage);
-  }
   if (normMean.size() == 3) {
     normMean_ = cv::Scalar(normMean[0], normMean[1], normMean[2]);
   } else if (!normMean.empty()) {
@@ -47,14 +34,65 @@ ObjectDetection::ObjectDetection(
   }
 }
 
+cv::Size ObjectDetection::modelInputSize() const {
+  if (currentlyLoadedMethod_.empty()) {
+    return VisionModel::modelInputSize();
+  }
+  auto inputShapes = getAllInputShapes(currentlyLoadedMethod_);
+  if (inputShapes.empty() || inputShapes[0].size() < 2) {
+    return VisionModel::modelInputSize();
+  }
+  const auto &shape = inputShapes[0];
+  return {static_cast<int>(shape[shape.size() - 2]),
+          static_cast<int>(shape[shape.size() - 1])};
+}
+
+void ObjectDetection::ensureMethodLoaded(const std::string &methodName) {
+  if (methodName.empty()) {
+    throw RnExecutorchError(RnExecutorchErrorCode::InvalidUserInput,
+                            "methodName cannot be empty");
+  }
+  if (currentlyLoadedMethod_ == methodName) {
+    return;
+  }
+  if (!module_) {
+    throw RnExecutorchError(RnExecutorchErrorCode::ModuleNotLoaded,
+                            "Model module is not loaded");
+  }
+  if (!currentlyLoadedMethod_.empty()) {
+    module_->unload_method(currentlyLoadedMethod_);
+  }
+  auto loadResult = module_->load_method(methodName);
+  if (loadResult != executorch::runtime::Error::Ok) {
+    throw RnExecutorchError(
+        loadResult, "Failed to load method '" + methodName +
+                        "'. Ensure the method exists in the exported model.");
+  }
+  currentlyLoadedMethod_ = methodName;
+}
+
+std::set<int32_t> ObjectDetection::prepareAllowedClasses(
+    const std::vector<int32_t> &classIndices) const {
+  std::set<int32_t> allowedClasses;
+  if (!classIndices.empty()) {
+    allowedClasses.insert(classIndices.begin(), classIndices.end());
+  }
+  return allowedClasses;
+}
+
 std::vector<types::Detection>
 ObjectDetection::postprocess(const std::vector<EValue> &tensors,
-                             cv::Size originalSize, double detectionThreshold) {
+                             cv::Size originalSize, double detectionThreshold,
+                             double iouThreshold,
+                             const std::vector<int32_t> &classIndices) {
   const cv::Size inputSize = modelInputSize();
   float widthRatio = static_cast<float>(originalSize.width) / inputSize.width;
   float heightRatio =
       static_cast<float>(originalSize.height) / inputSize.height;
 
+  // Prepare allowed classes set for filtering
+  auto allowedClasses = prepareAllowedClasses(classIndices);
+
   std::vector<types::Detection> detections;
   auto bboxTensor = tensors.at(0).toTensor();
   std::span<const float> bboxes(
@@ -75,12 +113,21 @@ ObjectDetection::postprocess(const std::vector<EValue> &tensors,
     if (scores[i] < detectionThreshold) {
       continue;
     }
+
+    auto labelIdx = static_cast<int32_t>(labels[i]);
+
+    // Filter by class if classesOfInterest is specified
+    if (!allowedClasses.empty() &&
+        allowedClasses.find(labelIdx) == allowedClasses.end()) {
+      continue;
+    }
+
     float x1 = bboxes[i * 4] * widthRatio;
     float y1 = bboxes[i * 4 + 1] * heightRatio;
     float x2 = bboxes[i * 4 + 2] * widthRatio;
     float y2 = bboxes[i * 4 + 3] * heightRatio;
-    auto labelIdx = static_cast<std::size_t>(labels[i]);
-    if (labelIdx >= labelNames_.size()) {
+
+    if (static_cast<std::size_t>(labelIdx) >= labelNames_.size()) {
       throw RnExecutorchError(
           RnExecutorchErrorCode::InvalidConfig,
           "Model output class index " + std::to_string(labelIdx) +
@@ -88,23 +135,40 @@ ObjectDetection::postprocess(const std::vector<EValue> &tensors,
               ". Ensure the labelMap covers all model output classes.");
     }
     detections.emplace_back(utils::computer_vision::BBox{x1, y1, x2, y2},
-                            labelNames_[labelIdx],
-                            static_cast<int32_t>(labelIdx), scores[i]);
+                            labelNames_[labelIdx], labelIdx, scores[i]);
   }
 
-  return utils::computer_vision::nonMaxSuppression(detections,
-                                                   constants::IOU_THRESHOLD);
+  return utils::computer_vision::nonMaxSuppression(detections, iouThreshold);
 }
 
-std::vector<types::Detection>
-ObjectDetection::runInference(cv::Mat image, double detectionThreshold) {
+std::vector<types::Detection> ObjectDetection::runInference(
+    cv::Mat image, double detectionThreshold, double iouThreshold,
+    const std::vector<int32_t> &classIndices, const std::string &methodName) {
   if (detectionThreshold < 0.0 || detectionThreshold > 1.0) {
     throw RnExecutorchError(RnExecutorchErrorCode::InvalidUserInput,
                             "detectionThreshold must be in range [0, 1]");
   }
+  if (iouThreshold < 0.0 || iouThreshold > 1.0) {
+    throw RnExecutorchError(RnExecutorchErrorCode::InvalidUserInput,
+                            "iouThreshold must be in range [0, 1]");
+  }
+
   std::scoped_lock lock(inference_mutex_);
 
+  // Ensure the correct method is loaded
+  ensureMethodLoaded(methodName);
+
   cv::Size originalSize = image.size();
+
+  // Query input shapes for the currently loaded method
+  auto inputShapes = getAllInputShapes(methodName);
+  if (inputShapes.empty() || inputShapes[0].size() < 2) {
+    throw RnExecutorchError(RnExecutorchErrorCode::UnexpectedNumInputs,
+                            "Could not determine input shape for method: " +
+                                methodName);
+  }
+  modelInputShape_ = inputShapes[0];
+
   cv::Mat preprocessed = preprocess(image);
 
   auto inputTensor =
@@ -114,46 +178,50 @@ ObjectDetection::runInference(cv::Mat image, double detectionThreshold) {
           : image_processing::getTensorFromMatrix(modelInputShape_,
                                                   preprocessed);
 
-  auto forwardResult = BaseModel::forward(inputTensor);
-  if (!forwardResult.ok()) {
-    throw RnExecutorchError(forwardResult.error(),
-                            "The model's forward function did not succeed. "
-                            "Ensure the model input is correct.");
+  auto executeResult = execute(methodName, {inputTensor});
+  if (!executeResult.ok()) {
+    throw RnExecutorchError(executeResult.error(),
+                            "The model's " + methodName +
+                                " method did not succeed. "
+                                "Ensure the model input is correct.");
   }
 
-  return postprocess(forwardResult.get(), originalSize, detectionThreshold);
+  return postprocess(executeResult.get(), originalSize, detectionThreshold,
+                     iouThreshold, classIndices);
 }
 
-std::vector<types::Detection>
-ObjectDetection::generateFromString(std::string imageSource,
-                                    double detectionThreshold) {
+std::vector<types::Detection> ObjectDetection::generateFromString(
+    std::string imageSource, double detectionThreshold, double iouThreshold,
+    std::vector<int32_t> classIndices, std::string methodName) {
   cv::Mat imageBGR = image_processing::readImage(imageSource);
 
   cv::Mat imageRGB;
   cv::cvtColor(imageBGR, imageRGB, cv::COLOR_BGR2RGB);
 
-  return runInference(imageRGB, detectionThreshold);
+  return runInference(imageRGB, detectionThreshold, iouThreshold, classIndices,
+                      methodName);
 }
 
-std::vector<types::Detection>
-ObjectDetection::generateFromFrame(jsi::Runtime &runtime,
-                                   const jsi::Value &frameData,
-                                   double detectionThreshold) {
-  auto orient = ::rnexecutorch::utils::readFrameOrientation(runtime, frameData);
+std::vector<types::Detection> ObjectDetection::generateFromFrame(
+    jsi::Runtime &runtime, const jsi::Value &frameData,
+    double detectionThreshold, double iouThreshold,
+    std::vector<int32_t> classIndices, std::string methodName) {
   cv::Mat frame = extractFromFrame(runtime, frameData);
-  cv::Mat rotated = ::rnexecutorch::utils::rotateFrameForModel(frame, orient);
-  auto detections = runInference(rotated, detectionThreshold);
+  auto detections = runInference(frame, detectionThreshold, iouThreshold,
+                                 classIndices, methodName);
+
   for (auto &det : detections) {
     ::rnexecutorch::utils::inverseRotateBbox(det.bbox, orient, rotated.size());
   }
   return detections;
 }
 
-std::vector<types::Detection>
-ObjectDetection::generateFromPixels(JSTensorViewIn pixelData,
-                                    double detectionThreshold) {
+std::vector<types::Detection> ObjectDetection::generateFromPixels(
+    JSTensorViewIn pixelData, double detectionThreshold, double iouThreshold,
+    std::vector<int32_t> classIndices, std::string methodName) {
   cv::Mat image = extractFromPixels(pixelData);
 
-  return runInference(image, detectionThreshold);
+  return runInference(image, detectionThreshold, iouThreshold, classIndices,
+                      methodName);
 }
 } // namespace rnexecutorch::models::object_detection
diff --git a/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.h b/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.h
index 1a7e72a6db..6e3c01356e 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.h
+++ b/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.h
@@ -57,6 +57,13 @@ class ObjectDetection : public VisionModel {
    * @param imageSource        URI or file path of the input image.
    * @param detectionThreshold Minimum confidence score in (0, 1] for a
    *                           detection to be included in the output.
+   * @param iouThreshold       IoU threshold for non-maximum suppression.
+   * @param classIndices       Optional list of class indices to filter results.
+   *                           Only detections matching these classes will be
+   *                           returned. Pass empty vector to include all
+   * classes.
+   * @param methodName         Name of the method to execute (e.g., "forward",
+   *                           "forward_384", "forward_512", "forward_640").
    *
    * @return A vector of @ref types::Detection objects with bounding boxes,
    *         label strings (resolved via the label names passed to the
@@ -66,16 +73,33 @@ class ObjectDetection : public VisionModel {
    *         fails.
    */
   [[nodiscard("Registered non-void function")]] std::vector<types::Detection>
-  generateFromString(std::string imageSource, double detectionThreshold);
+  generateFromString(std::string imageSource, double detectionThreshold,
+                     double iouThreshold, std::vector<int32_t> classIndices,
+                     std::string methodName);
   [[nodiscard("Registered non-void function")]] std::vector<types::Detection>
   generateFromFrame(jsi::Runtime &runtime, const jsi::Value &frameData,
-                    double detectionThreshold);
+                    double detectionThreshold, double iouThreshold,
+                    std::vector<int32_t> classIndices, std::string methodName);
   [[nodiscard("Registered non-void function")]] std::vector<types::Detection>
-  generateFromPixels(JSTensorViewIn pixelData, double detectionThreshold);
+  generateFromPixels(JSTensorViewIn pixelData, double detectionThreshold,
+                     double iouThreshold, std::vector<int32_t> classIndices,
+                     std::string methodName);
 
 protected:
-  std::vector<types::Detection> runInference(cv::Mat image,
-                                             double detectionThreshold);
+  /**
+   * @brief Returns the model input size based on the currently loaded method.
+   *
+   * Overrides VisionModel::modelInputSize() to support multi-method models
+   * where each method may have different input dimensions.
+   *
+   * @return The expected input size for the currently loaded method.
+   */
+  cv::Size modelInputSize() const override;
+
+  std::vector<types::Detection>
+  runInference(cv::Mat image, double detectionThreshold, double iouThreshold,
+               const std::vector<int32_t> &classIndices,
+               const std::string &methodName);
 
 private:
   /**
@@ -88,15 +112,37 @@ class ObjectDetection : public VisionModel {
    *                           bounding boxes back to input coordinates.
    * @param detectionThreshold Confidence threshold below which detections
    *                           are discarded.
+   * @param iouThreshold       IoU threshold for non-maximum suppression.
+   * @param classIndices       Optional list of class indices to filter results.
    *
    * @return Non-max-suppressed detections above the threshold.
    *
    * @throws RnExecutorchError if the model outputs a class index that exceeds
    *         the size of @ref labelNames_.
    */
-  std::vector<types::Detection> postprocess(const std::vector<EValue> &tensors,
-                                            cv::Size originalSize,
-                                            double detectionThreshold);
+  std::vector<types::Detection>
+  postprocess(const std::vector<EValue> &tensors, cv::Size originalSize,
+              double detectionThreshold, double iouThreshold,
+              const std::vector<int32_t> &classIndices);
+
+  /**
+   * @brief Ensures the specified method is loaded, unloading any previous
+   * method if necessary.
+   *
+   * @param methodName Name of the method to load (e.g., "forward",
+   * "forward_384").
+   * @throws RnExecutorchError if the method cannot be loaded.
+   */
+  void ensureMethodLoaded(const std::string &methodName);
+
+  /**
+   * @brief Prepares a set of allowed class indices for filtering detections.
+   *
+   * @param classIndices Vector of class indices to allow.
+   * @return A set containing the allowed class indices.
+   */
+  std::set<int32_t>
+  prepareAllowedClasses(const std::vector<int32_t> &classIndices) const;
 
   /// Optional per-channel mean for input normalisation (set in constructor).
   std::optional<cv::Scalar> normMean_;
@@ -106,6 +152,9 @@ class ObjectDetection : public VisionModel {
 
   /// Ordered label strings mapping class indices to human-readable names.
   std::vector<std::string> labelNames_;
+
+  /// Name of the currently loaded method (for multi-method models).
+  std::string currentlyLoadedMethod_;
 };
 } // namespace models::object_detection
 
diff --git a/packages/react-native-executorch/src/constants/modelUrls.ts b/packages/react-native-executorch/src/constants/modelUrls.ts
index 0e4bcdf080..c173b839cb 100644
--- a/packages/react-native-executorch/src/constants/modelUrls.ts
+++ b/packages/react-native-executorch/src/constants/modelUrls.ts
@@ -470,6 +470,53 @@ export const RF_DETR_NANO = {
   modelSource: RF_DETR_NANO_MODEL,
 } as const;
 
+// YOLO26 Object Detection
+const YOLO26N_DETECTION_MODEL = `${URL_PREFIX}-yolo26/${NEXT_VERSION_TAG}/yolo26n/xnnpack/yolo26n.pte`;
+const YOLO26S_DETECTION_MODEL = `${URL_PREFIX}-yolo26/${NEXT_VERSION_TAG}/yolo26s/xnnpack/yolo26s.pte`;
+const YOLO26M_DETECTION_MODEL = `${URL_PREFIX}-yolo26/${NEXT_VERSION_TAG}/yolo26m/xnnpack/yolo26m.pte`;
+const YOLO26L_DETECTION_MODEL = `${URL_PREFIX}-yolo26/${NEXT_VERSION_TAG}/yolo26l/xnnpack/yolo26l.pte`;
+const YOLO26X_DETECTION_MODEL = `${URL_PREFIX}-yolo26/${NEXT_VERSION_TAG}/yolo26x/xnnpack/yolo26x.pte`;
+
+/**
+ * @category Models - Object Detection
+ */
+export const YOLO26N = {
+  modelName: 'yolo26n',
+  modelSource: YOLO26N_DETECTION_MODEL,
+} as const;
+
+/**
+ * @category Models - Object Detection
+ */
+export const YOLO26S = {
+  modelName: 'yolo26s',
+  modelSource: YOLO26S_DETECTION_MODEL,
+} as const;
+
+/**
+ * @category Models - Object Detection
+ */
+export const YOLO26M = {
+  modelName: 'yolo26m',
+  modelSource: YOLO26M_DETECTION_MODEL,
+} as const;
+
+/**
+ * @category Models - Object Detection
+ */
+export const YOLO26L = {
+  modelName: 'yolo26l',
+  modelSource: YOLO26L_DETECTION_MODEL,
+} as const;
+
+/**
+ * @category Models - Object Detection
+ */
+export const YOLO26X = {
+  modelName: 'yolo26x',
+  modelSource: YOLO26X_DETECTION_MODEL,
+} as const;
+
 // Style transfer
 const STYLE_TRANSFER_CANDY_MODEL =
   Platform.OS === `ios`
diff --git a/packages/react-native-executorch/src/hooks/computer_vision/useObjectDetection.ts b/packages/react-native-executorch/src/hooks/computer_vision/useObjectDetection.ts
index c19b819318..cc5d69e42a 100644
--- a/packages/react-native-executorch/src/hooks/computer_vision/useObjectDetection.ts
+++ b/packages/react-native-executorch/src/hooks/computer_vision/useObjectDetection.ts
@@ -6,6 +6,7 @@ import {
   ObjectDetectionModelSources,
   ObjectDetectionProps,
   ObjectDetectionType,
+  ObjectDetectionOptions,
 } from '../../types/objectDetection';
 import { PixelData } from '../../types/common';
 import { useModuleFactory } from '../useModuleFactory';
@@ -30,6 +31,7 @@ export const useObjectDetection = <C extends ObjectDetectionModelSources>({
     downloadProgress,
     runForward,
     runOnFrame,
+    instance,
   } = useModuleFactory({
     factory: (config, onProgress) =>
       ObjectDetectionModule.fromModelName(config, onProgress),
@@ -38,8 +40,13 @@ export const useObjectDetection = <C extends ObjectDetectionModelSources>({
     preventLoad,
   });
 
-  const forward = (input: string | PixelData, detectionThreshold?: number) =>
-    runForward((inst) => inst.forward(input, detectionThreshold));
+  const forward = (
+    input: string | PixelData,
+    options?: ObjectDetectionOptions<ObjectDetectionLabels<C['modelName']>>
+  ) => runForward((inst) => inst.forward(input, options));
+
+  const getAvailableInputSizes = () =>
+    instance?.getAvailableInputSizes() ?? undefined;
 
   return {
     error,
@@ -48,5 +55,6 @@ export const useObjectDetection = <C extends ObjectDetectionModelSources>({
     downloadProgress,
     forward,
     runOnFrame,
+    getAvailableInputSizes,
   };
 };
diff --git a/packages/react-native-executorch/src/modules/computer_vision/ObjectDetectionModule.ts b/packages/react-native-executorch/src/modules/computer_vision/ObjectDetectionModule.ts
index cbb3847ffa..93d08c0c9c 100644
--- a/packages/react-native-executorch/src/modules/computer_vision/ObjectDetectionModule.ts
+++ b/packages/react-native-executorch/src/modules/computer_vision/ObjectDetectionModule.ts
@@ -4,9 +4,13 @@ import {
   ObjectDetectionConfig,
   ObjectDetectionModelName,
   ObjectDetectionModelSources,
+  ObjectDetectionOptions,
 } from '../../types/objectDetection';
+import { RnExecutorchErrorCode } from '../../errors/ErrorCodes';
+import { RnExecutorchError } from '../../errors/errorUtils';
 import {
   CocoLabel,
+  CocoLabelYolo,
   IMAGENET1K_MEAN,
   IMAGENET1K_STD,
 } from '../../constants/commonVision';
@@ -16,15 +20,37 @@ import {
   VisionLabeledModule,
 } from './VisionLabeledModule';
 
+const YOLO_DETECTION_CONFIG = {
+  labelMap: CocoLabelYolo,
+  preprocessorConfig: undefined,
+  availableInputSizes: [384, 512, 640] as const,
+  defaultInputSize: 384,
+  defaultDetectionThreshold: 0.5,
+  defaultIouThreshold: 0.5,
+} satisfies ObjectDetectionConfig<typeof CocoLabelYolo>;
+
 const ModelConfigs = {
   'ssdlite-320-mobilenet-v3-large': {
     labelMap: CocoLabel,
     preprocessorConfig: undefined,
+    availableInputSizes: undefined,
+    defaultInputSize: undefined,
+    defaultDetectionThreshold: 0.7,
+    defaultIouThreshold: 0.55,
   },
   'rf-detr-nano': {
     labelMap: CocoLabel,
     preprocessorConfig: { normMean: IMAGENET1K_MEAN, normStd: IMAGENET1K_STD },
+    availableInputSizes: undefined,
+    defaultInputSize: undefined,
+    defaultDetectionThreshold: 0.7,
+    defaultIouThreshold: 0.55,
   },
+  'yolo26n': YOLO_DETECTION_CONFIG,
+  'yolo26s': YOLO_DETECTION_CONFIG,
+  'yolo26m': YOLO_DETECTION_CONFIG,
+  'yolo26l': YOLO_DETECTION_CONFIG,
+  'yolo26x': YOLO_DETECTION_CONFIG,
 } as const satisfies Record<
   ObjectDetectionModelName,
   ObjectDetectionConfig<LabelEnum>
@@ -55,8 +81,15 @@ type ResolveLabels<T extends ObjectDetectionModelName | LabelEnum> =
 export class ObjectDetectionModule<
   T extends ObjectDetectionModelName | LabelEnum,
 > extends VisionLabeledModule<Detection<ResolveLabels<T>>[], ResolveLabels<T>> {
-  private constructor(labelMap: ResolveLabels<T>, nativeModule: unknown) {
+  private modelConfig: ObjectDetectionConfig<LabelEnum>;
+
+  private constructor(
+    labelMap: ResolveLabels<T>,
+    modelConfig: ObjectDetectionConfig<LabelEnum>,
+    nativeModule: unknown
+  ) {
     super(labelMap, nativeModule);
+    this.modelConfig = modelConfig;
   }
 
   /**
@@ -70,9 +103,10 @@ export class ObjectDetectionModule<
     onDownloadProgress: (progress: number) => void = () => {}
   ): Promise<ObjectDetectionModule<ModelNameOf<C>>> {
     const { modelSource } = namedSources;
-    const { labelMap, preprocessorConfig } = ModelConfigs[
+    const modelConfig = ModelConfigs[
       namedSources.modelName
     ] as ObjectDetectionConfig<LabelEnum>;
+    const { labelMap, preprocessorConfig } = modelConfig;
     const normMean = preprocessorConfig?.normMean ?? [];
     const normStd = preprocessorConfig?.normStd ?? [];
     const allLabelNames: string[] = [];
@@ -91,21 +125,165 @@ export class ObjectDetectionModule<
     );
     return new ObjectDetectionModule<ModelNameOf<C>>(
       labelMap as ResolveLabels<ModelNameOf<C>>,
+      modelConfig,
       nativeModule
     );
   }
 
+  /**
+   * Returns the available input sizes for this model, or undefined if the model accepts any size.
+   *
+   * @returns An array of available input sizes, or undefined if not constrained.
+   *
+   * @example
+   * ```typescript
+   * const sizes = model.getAvailableInputSizes(); // [384, 512, 640] for YOLO models, or undefined for RF-DETR
+   * ```
+   */
+  getAvailableInputSizes(): readonly number[] | undefined {
+    return this.modelConfig.availableInputSizes;
+  }
+
+  /**
+   * Override runOnFrame to provide an options-based API for VisionCamera integration.
+   */
+  override get runOnFrame():
+    | ((
+        frame: any,
+        options?: ObjectDetectionOptions<ResolveLabels<T>>
+      ) => Detection<ResolveLabels<T>>[])
+    | null {
+    const baseRunOnFrame = super.runOnFrame;
+    if (!baseRunOnFrame) return null;
+
+    // Create reverse map (label → enum value) for classesOfInterest lookup
+    const labelMap: Record<string, number> = {};
+    for (const [name, value] of Object.entries(this.labelMap)) {
+      if (typeof value === 'number') {
+        labelMap[name] = value;
+      }
+    }
+
+    const defaultDetectionThreshold =
+      this.modelConfig.defaultDetectionThreshold ?? 0.7;
+    const defaultIouThreshold = this.modelConfig.defaultIouThreshold ?? 0.55;
+    const defaultInputSize = this.modelConfig.defaultInputSize;
+
+    return (
+      frame: any,
+      options?: ObjectDetectionOptions<ResolveLabels<T>>
+    ): Detection<ResolveLabels<T>>[] => {
+      'worklet';
+
+      const detectionThreshold =
+        options?.detectionThreshold ?? defaultDetectionThreshold;
+      const iouThreshold = options?.iouThreshold ?? defaultIouThreshold;
+      const inputSize = options?.inputSize ?? defaultInputSize;
+      const methodName =
+        inputSize !== undefined ? `forward_${inputSize}` : 'forward';
+
+      const classIndices = options?.classesOfInterest
+        ? options.classesOfInterest.map((label) => {
+            const labelStr = String(label);
+            const enumValue = labelMap[labelStr];
+            return typeof enumValue === 'number' ? enumValue : -1;
+          })
+        : [];
+
+      return baseRunOnFrame(
+        frame,
+        detectionThreshold,
+        iouThreshold,
+        classIndices,
+        methodName
+      );
+    };
+  }
+
   /**
    * Executes the model's forward pass to detect objects within the provided image.
+   *
+   * Supports two input types:
+   * 1. **String path/URI**: File path, URL, or Base64-encoded string
+   * 2. **PixelData**: Raw pixel data from image libraries (e.g., NitroImage)
+   *
    * @param input - A string image source (file path, URI, or Base64) or a {@link PixelData} object.
-   * @param detectionThreshold - Minimum confidence score for a detection to be included. Default is 0.7.
+   * @param options - Optional configuration for detection inference. Includes `detectionThreshold`, `inputSize`, and `classesOfInterest`.
    * @returns A Promise resolving to an array of {@link Detection} objects.
+   * @throws {RnExecutorchError} If the model is not loaded or if an invalid `inputSize` is provided.
+   *
+   * @example
+   * ```typescript
+   * const detections = await model.forward('path/to/image.jpg', {
+   *   detectionThreshold: 0.7,
+   *   inputSize: 640,  // For YOLO models
+   *   classesOfInterest: ['PERSON', 'CAR'],
+   * });
+   * ```
    */
   override async forward(
     input: string | PixelData,
-    detectionThreshold = 0.7
+    options?: ObjectDetectionOptions<ResolveLabels<T>>
   ): Promise<Detection<ResolveLabels<T>>[]> {
-    return super.forward(input, detectionThreshold);
+    if (this.nativeModule == null) {
+      throw new RnExecutorchError(
+        RnExecutorchErrorCode.ModuleNotLoaded,
+        'The model is currently not loaded. Please load the model before calling forward().'
+      );
+    }
+
+    // Extract parameters with defaults from config
+    const detectionThreshold =
+      options?.detectionThreshold ??
+      this.modelConfig.defaultDetectionThreshold ??
+      0.7;
+    const iouThreshold =
+      options?.iouThreshold ?? this.modelConfig.defaultIouThreshold ?? 0.55;
+    const inputSize = options?.inputSize ?? this.modelConfig.defaultInputSize;
+
+    // Validate inputSize against availableInputSizes
+    if (
+      this.modelConfig.availableInputSizes &&
+      inputSize !== undefined &&
+      !this.modelConfig.availableInputSizes.includes(
+        inputSize as (typeof this.modelConfig.availableInputSizes)[number]
+      )
+    ) {
+      throw new RnExecutorchError(
+        RnExecutorchErrorCode.InvalidArgument,
+        `Invalid inputSize: ${inputSize}. Available sizes: ${this.modelConfig.availableInputSizes.join(', ')}`
+      );
+    }
+
+    // Construct method name: forward_384, forward_512, forward_640, or forward
+    const methodName =
+      inputSize !== undefined ? `forward_${inputSize}` : 'forward';
+
+    // Convert classesOfInterest to indices
+    const classIndices = options?.classesOfInterest
+      ? options.classesOfInterest.map((label) => {
+          const labelStr = String(label);
+          const enumValue = this.labelMap[labelStr as keyof ResolveLabels<T>];
+          return typeof enumValue === 'number' ? enumValue : -1;
+        })
+      : [];
+
+    // Call native with all parameters
+    return typeof input === 'string'
+      ? await this.nativeModule.generateFromString(
+          input,
+          detectionThreshold,
+          iouThreshold,
+          classIndices,
+          methodName
+        )
+      : await this.nativeModule.generateFromPixels(
+          input,
+          detectionThreshold,
+          iouThreshold,
+          classIndices,
+          methodName
+        );
   }
 
   /**
@@ -159,6 +337,7 @@ export class ObjectDetectionModule<
     );
     return new ObjectDetectionModule<L>(
       config.labelMap as ResolveLabels<L>,
+      config,
       nativeModule
     );
   }
diff --git a/packages/react-native-executorch/src/types/objectDetection.ts b/packages/react-native-executorch/src/types/objectDetection.ts
index 8e8bf02896..bdb380c0ac 100644
--- a/packages/react-native-executorch/src/types/objectDetection.ts
+++ b/packages/react-native-executorch/src/types/objectDetection.ts
@@ -32,6 +32,23 @@ export interface Detection<L extends LabelEnum = typeof CocoLabel> {
   score: number;
 }
 
+/**
+ * Options for configuring object detection inference.
+ *
+ * @category Types
+ * @typeParam L - The label enum type for filtering classes of interest.
+ * @property {number} [detectionThreshold] - Minimum confidence score for detections (0-1). Defaults to model-specific value.
+ * @property {number} [iouThreshold] - IoU threshold for non-maximum suppression (0-1). Defaults to model-specific value.
+ * @property {number} [inputSize] - Input size for multi-method models (e.g., 384, 512, 640 for YOLO). Required for YOLO models if not using default.
+ * @property {(keyof L)[]} [classesOfInterest] - Optional array of class labels to filter detections. Only detections matching these classes will be returned.
+ */
+export interface ObjectDetectionOptions<L extends LabelEnum> {
+  detectionThreshold?: number;
+  iouThreshold?: number;
+  inputSize?: number;
+  classesOfInterest?: (keyof L)[];
+}
+
 /**
  * Per-model config for {@link ObjectDetectionModule.fromModelName}.
  * Each model name maps to its required fields.
@@ -39,7 +56,12 @@ export interface Detection<L extends LabelEnum = typeof CocoLabel> {
  */
 export type ObjectDetectionModelSources =
   | { modelName: 'ssdlite-320-mobilenet-v3-large'; modelSource: ResourceSource }
-  | { modelName: 'rf-detr-nano'; modelSource: ResourceSource };
+  | { modelName: 'rf-detr-nano'; modelSource: ResourceSource }
+  | { modelName: 'yolo26n'; modelSource: ResourceSource }
+  | { modelName: 'yolo26s'; modelSource: ResourceSource }
+  | { modelName: 'yolo26m'; modelSource: ResourceSource }
+  | { modelName: 'yolo26l'; modelSource: ResourceSource }
+  | { modelName: 'yolo26x'; modelSource: ResourceSource };
 
 /**
  * Union of all built-in object detection model names.
@@ -50,11 +72,29 @@ export type ObjectDetectionModelName = ObjectDetectionModelSources['modelName'];
 /**
  * Configuration for a custom object detection model.
  * @category Types
+ * @typeParam T - The label enum type for the model.
+ * @property {T} labelMap - The label mapping for the model.
+ * @property {object} [preprocessorConfig] - Optional preprocessing configuration with normalization parameters.
+ * @property {number} [defaultDetectionThreshold] - Default detection confidence threshold (0-1).
+ * @property {number} [defaultIouThreshold] - Default IoU threshold for non-maximum suppression (0-1).
+ * @property {readonly number[]} [availableInputSizes] - For multi-method models, the available input sizes (e.g., [384, 512, 640]).
+ * @property {number} [defaultInputSize] - For multi-method models, the default input size to use.
  */
 export type ObjectDetectionConfig<T extends LabelEnum> = {
   labelMap: T;
   preprocessorConfig?: { normMean?: Triple<number>; normStd?: Triple<number> };
-};
+  defaultDetectionThreshold?: number;
+  defaultIouThreshold?: number;
+} & (
+  | {
+      availableInputSizes: readonly number[];
+      defaultInputSize: number;
+    }
+  | {
+      availableInputSizes?: undefined;
+      defaultInputSize?: undefined;
+    }
+);
 
 /**
  * Props for the `useObjectDetection` hook.
@@ -98,27 +138,44 @@ export interface ObjectDetectionType<L extends LabelEnum> {
   /**
    * Executes the model's forward pass with automatic input type detection.
    * @param input - Image source (string path/URI or PixelData object)
-   * @param detectionThreshold - An optional number between 0 and 1 representing the minimum confidence score. Default is 0.7.
+   * @param options - Optional configuration for detection inference
    * @returns A Promise that resolves to an array of `Detection` objects.
    * @throws {RnExecutorchError} If the model is not loaded or is currently processing another image.
    * @example
    * ```typescript
-   * // String path
-   * const detections1 = await model.forward('file:///path/to/image.jpg');
+   * // String path with options
+   * const detections1 = await model.forward('file:///path/to/image.jpg', {
+   *   detectionThreshold: 0.7,
+   *   inputSize: 640,  // For YOLO models
+   *   classesOfInterest: ['PERSON', 'CAR']
+   * });
    *
    * // Pixel data
    * const detections2 = await model.forward({
    *   dataPtr: new Uint8Array(rgbPixels),
    *   sizes: [480, 640, 3],
    *   scalarType: ScalarType.BYTE
-   * });
+   * }, { detectionThreshold: 0.5 });
    * ```
    */
   forward: (
     input: string | PixelData,
-    detectionThreshold?: number
+    options?: ObjectDetectionOptions<L>
   ) => Promise<Detection<L>[]>;
 
+  /**
+   * Returns the available input sizes for multi-method models (e.g., YOLO).
+   * Returns undefined for single-method models (e.g., RF-DETR, SSDLite).
+   *
+   * @returns Array of available input sizes or undefined
+   *
+   * @example
+   * ```typescript
+   * const sizes = model.getAvailableInputSizes(); // [384, 512, 640] for YOLO models
+   * ```
+   */
+  getAvailableInputSizes: () => readonly number[] | undefined;
+
   /**
    * Synchronous worklet function for real-time VisionCamera frame processing.
    * Automatically handles native buffer extraction and cleanup.
@@ -129,14 +186,12 @@ export interface ObjectDetectionType<L extends LabelEnum> {
    * Available after model is loaded (`isReady: true`).
    * @param frame - VisionCamera Frame object
    * @param isFrontCamera - Whether the front camera is active, used for mirroring corrections.
-   * @param detectionThreshold - The threshold for detection sensitivity.
+   * @param options - Optional configuration for detection inference
    * @returns Array of Detection objects representing detected items in the frame.
    */
-  runOnFrame:
-    | ((
-        frame: Frame,
-        isFrontCamera: boolean,
-        detectionThreshold: number
-      ) => Detection<L>[])
-    | null;
+  runOnFrame: (
+    frame: Frame,
+    isFrontCamera: boolean,
+    options?: ObjectDetectionOptions<L>
+  ) => Detection<L>[];
 }

From 9b77da76a7f93d7851c880c8d000b2448713bea6 Mon Sep 17 00:00:00 2001
From: benITo47 <bartlomiej.obrochta03@gmail.com>
Date: Fri, 20 Mar 2026 10:57:53 +0100
Subject: [PATCH 2/8] Update docs

---
 docs/docs/02-benchmarks/inference-time.md     | 10 +++++
 docs/docs/02-benchmarks/memory-usage.md       | 10 +++++
 docs/docs/02-benchmarks/model-size.md         |  6 +++
 .../02-computer-vision/useObjectDetection.md  | 38 ++++++++++++-------
 .../ObjectDetectionModule.md                  | 16 +++++++-
 5 files changed, 66 insertions(+), 14 deletions(-)

diff --git a/docs/docs/02-benchmarks/inference-time.md b/docs/docs/02-benchmarks/inference-time.md
index cec25098b8..fe1c143409 100644
--- a/docs/docs/02-benchmarks/inference-time.md
+++ b/docs/docs/02-benchmarks/inference-time.md
@@ -43,11 +43,21 @@ processing. Resizing is typically fast for small images but may be noticeably
 slower for very large images, which can increase total time.
 :::
 
+:::warning
+Times presented in the tables are measured for forward method with input size equal to 512. Other input sizes may yield slower or faster inference times.
+:::
+
 | Model / Device                                | iPhone 17 Pro [ms] | Google Pixel 10 [ms] |
 | :-------------------------------------------- | :----------------: | :------------------: |
 | SSDLITE_320_MOBILENET_V3_LARGE (XNNPACK FP32) |         20         |          18          |
 | SSDLITE_320_MOBILENET_V3_LARGE (Core ML FP32) |         18         |          -           |
 | SSDLITE_320_MOBILENET_V3_LARGE (Core ML FP16) |         8          |          -           |
+| RF_DETR_NANO (XNNPACK FP32)                   |        TBD         |         TBD          |
+| YOLO26N (XNNPACK FP32)                        |        TBD         |         TBD          |
+| YOLO26S (XNNPACK FP32)                        |        TBD         |         TBD          |
+| YOLO26M (XNNPACK FP32)                        |        TBD         |         TBD          |
+| YOLO26L (XNNPACK FP32)                        |        TBD         |         TBD          |
+| YOLO26X (XNNPACK FP32)                        |        TBD         |         TBD          |
 
 ## Style Transfer
 
diff --git a/docs/docs/02-benchmarks/memory-usage.md b/docs/docs/02-benchmarks/memory-usage.md
index 0ad6e7a11d..0e1b3ccee8 100644
--- a/docs/docs/02-benchmarks/memory-usage.md
+++ b/docs/docs/02-benchmarks/memory-usage.md
@@ -25,11 +25,21 @@ loaded and actively running inference, relative to the baseline app memory
 before model initialization.
 :::
 
+:::warning
+Data presented for YOLO models is based on inference with forward_640 method.
+:::
+
 | Model / Device                                | iPhone 17 Pro [MB] | Google Pixel 10 [MB] |
 | --------------------------------------------- | :----------------: | :------------------: |
 | SSDLITE_320_MOBILENET_V3_LARGE (XNNPACK FP32) |         94         |         104          |
 | SSDLITE_320_MOBILENET_V3_LARGE (Core ML FP32) |         83         |          -           |
 | SSDLITE_320_MOBILENET_V3_LARGE (Core ML FP16) |         62         |          -           |
+| RF_DETR_NANO (XNNPACK FP32)                   |        TBD         |         TBD          |
+| YOLO26N (XNNPACK FP32)                        |        TBD         |         TBD          |
+| YOLO26S (XNNPACK FP32)                        |        TBD         |         TBD          |
+| YOLO26M (XNNPACK FP32)                        |        TBD         |         TBD          |
+| YOLO26L (XNNPACK FP32)                        |        TBD         |         TBD          |
+| YOLO26X (XNNPACK FP32)                        |        TBD         |         TBD          |
 
 ## Style Transfer
 
diff --git a/docs/docs/02-benchmarks/model-size.md b/docs/docs/02-benchmarks/model-size.md
index 38ea9e7a6e..f9f5e4701f 100644
--- a/docs/docs/02-benchmarks/model-size.md
+++ b/docs/docs/02-benchmarks/model-size.md
@@ -13,6 +13,12 @@ title: Model Size
 | Model                          | XNNPACK FP32 [MB] | Core ML FP32 [MB] | Core ML FP16 [MB] |
 | ------------------------------ | :---------------: | :---------------: | :---------------: |
 | SSDLITE_320_MOBILENET_V3_LARGE |       13.9        |       15.6        |       8.46        |
+| RF_DETR_NANO                   |        112        |         -         |         -         |
+| YOLO26N                        |       10.3        |         -         |         -         |
+| YOLO26S                        |       38.6        |         -         |         -         |
+| YOLO26M                        |       82.3        |         -         |         -         |
+| YOLO26L                        |        100        |         -         |         -         |
+| YOLO26X                        |        224        |         -         |         -         |
 
 ## Instance Segmentation
 
diff --git a/docs/docs/03-hooks/02-computer-vision/useObjectDetection.md b/docs/docs/03-hooks/02-computer-vision/useObjectDetection.md
index 5fb2b2bb3a..3ede23f48d 100644
--- a/docs/docs/03-hooks/02-computer-vision/useObjectDetection.md
+++ b/docs/docs/03-hooks/02-computer-vision/useObjectDetection.md
@@ -61,13 +61,18 @@ You need more details? Check the following resources:
 - `error` - An error object if the model failed to load or encountered a runtime error.
 - `downloadProgress` - A value between 0 and 1 representing the download progress of the model binary.
 - `forward` - A function to run inference on an image.
+- `getAvailableInputSizes` - A function that returns available input sizes for multi-method models (YOLO). Returns `undefined` for single-method models.
 
 ## Running the model
 
 To run the model, use the [`forward`](../../06-api-reference/interfaces/ObjectDetectionType.md#forward) method. It accepts two arguments:
 
 - `input` (required) - The image to process. Can be a remote URL, a local file URI, a base64-encoded image (whole URI or only raw base64), or a [`PixelData`](../../06-api-reference/interfaces/PixelData.md) object (raw RGB pixel buffer).
-- `detectionThreshold` (optional) - A number between 0 and 1 representing the minimum confidence score for a detection to be included in the results. Defaults to `0.7`.
+- `options` (optional) - An [`ObjectDetectionOptions`](../../06-api-reference/interfaces/ObjectDetectionOptions.md) object with the following properties:
+  - `detectionThreshold` (optional) - A number between 0 and 1 representing the minimum confidence score. Defaults to model-specific value (typically `0.7`).
+  - `iouThreshold` (optional) - IoU threshold for non-maximum suppression (0-1). Defaults to model-specific value (typically `0.55`).
+  - `inputSize` (optional) - For multi-method models like YOLO, specify the input resolution (`384`, `512`, or `640`). Defaults to `384` for YOLO models.
+  - `classesOfInterest` (optional) - Array of class labels to filter detections. Only detections matching these classes will be returned.
 
 `forward` returns a promise resolving to an array of [`Detection`](../../06-api-reference/interfaces/Detection.md) objects, each containing:
 
@@ -78,11 +83,11 @@ To run the model, use the [`forward`](../../06-api-reference/interfaces/ObjectDe
 ## Example
 
 ```typescript
-import { useObjectDetection, RF_DETR_NANO } from 'react-native-executorch';
+import { useObjectDetection, YOLO26N } from 'react-native-executorch';
 
 function App() {
   const model = useObjectDetection({
-    model: RF_DETR_NANO,
+    model: YOLO26N,
   });
 
   const handleDetect = async () => {
@@ -91,13 +96,12 @@ function App() {
     const imageUri = 'file:///Users/.../photo.jpg';
 
     try {
-      const detections = await model.forward(imageUri, 0.5);
+      const detections = await model.forward(imageUri, {
+        detectionThreshold: 0.5,
+        inputSize: 640,
+      });
 
-      for (const detection of detections) {
-        console.log('Label:', detection.label);
-        console.log('Score:', detection.score);
-        console.log('Bounding box:', detection.bbox);
-      }
+      console.log('Detected:', detections.length, 'objects');
     } catch (error) {
       console.error(error);
     }
@@ -113,7 +117,15 @@ See the full guide: [VisionCamera Integration](./visioncamera-integration.md).
 
 ## Supported models
 
-| Model                                                                                                                         | Number of classes | Class list                                               |
-| ----------------------------------------------------------------------------------------------------------------------------- | ----------------- | -------------------------------------------------------- |
-| [SSDLite320 MobileNetV3 Large](https://huggingface.co/software-mansion/react-native-executorch-ssdlite320-mobilenet-v3-large) | 91                | [COCO](../../06-api-reference/enumerations/CocoLabel.md) |
-| [RF-DETR Nano](https://huggingface.co/software-mansion/react-native-executorch-rf-detr-nano)                                  | 80                | [COCO](../../06-api-reference/enumerations/CocoLabel.md) |
+| Model | Number of classes | Class list | Multi-size Support |\n| ----------------------------------------------------------------------------------------------------------------------------- | ----------------- | ------------------------------------------------------------ | ------------------ |
+| [SSDLite320 MobileNetV3 Large](https://huggingface.co/software-mansion/react-native-executorch-ssdlite320-mobilenet-v3-large) | 91 | [COCO](../../06-api-reference/enumerations/CocoLabel.md) | No |
+| [RF-DETR Nano](https://huggingface.co/software-mansion/react-native-executorch-rf-detr-nano) | 80 | [COCO](../../06-api-reference/enumerations/CocoLabel.md) | No |
+| [YOLO26N](https://huggingface.co/software-mansion/react-native-executorch-yolo26) | 80 | [COCO YOLO](../../06-api-reference/enumerations/CocoLabel.md) | Yes (384/512/640) |
+| [YOLO26S](https://huggingface.co/software-mansion/react-native-executorch-yolo26) | 80 | [COCO YOLO](../../06-api-reference/enumerations/CocoLabel.md) | Yes (384/512/640) |
+| [YOLO26M](https://huggingface.co/software-mansion/react-native-executorch-yolo26) | 80 | [COCO YOLO](../../06-api-reference/enumerations/CocoLabel.md) | Yes (384/512/640) |
+| [YOLO26L](https://huggingface.co/software-mansion/react-native-executorch-yolo26) | 80 | [COCO YOLO](../../06-api-reference/enumerations/CocoLabel.md) | Yes (384/512/640) |
+| [YOLO26X](https://huggingface.co/software-mansion/react-native-executorch-yolo26) | 80 | [COCO YOLO](../../06-api-reference/enumerations/CocoLabel.md) | Yes (384/512/640) |
+
+:::tip
+YOLO models support multiple input sizes (384px, 512px, 640px). Smaller sizes are faster but less accurate, while larger sizes are more accurate but slower. Choose based on your speed/accuracy requirements.
+:::
diff --git a/docs/docs/04-typescript-api/02-computer-vision/ObjectDetectionModule.md b/docs/docs/04-typescript-api/02-computer-vision/ObjectDetectionModule.md
index b56cb47713..0d004e6752 100644
--- a/docs/docs/04-typescript-api/02-computer-vision/ObjectDetectionModule.md
+++ b/docs/docs/04-typescript-api/02-computer-vision/ObjectDetectionModule.md
@@ -43,12 +43,26 @@ For more information on loading resources, take a look at [loading models](../..
 To run the model, use the [`forward`](../../06-api-reference/classes/ObjectDetectionModule.md#forward) method. It accepts two arguments:
 
 - `input` (required) - The image to process. Can be a remote URL, a local file URI, a base64-encoded image (whole URI or only raw base64), or a [`PixelData`](../../06-api-reference/interfaces/PixelData.md) object (raw RGB pixel buffer).
-- `detectionThreshold` (optional) - A number between 0 and 1. Defaults to `0.7`.
+- `options` (optional) - An [`ObjectDetectionOptions`](../../06-api-reference/interfaces/ObjectDetectionOptions.md) object with:
+  - `detectionThreshold` (optional) - Minimum confidence score (0-1). Defaults to model-specific value.
+  - `iouThreshold` (optional) - IoU threshold for NMS (0-1). Defaults to model-specific value.
+  - `inputSize` (optional) - For YOLO models: `384`, `512`, or `640`. Defaults to `384`.
+  - `classesOfInterest` (optional) - Array of class labels to filter detections.
 
 The method returns a promise resolving to an array of [`Detection`](../../06-api-reference/interfaces/Detection.md) objects, each containing the bounding box, label, and confidence score.
 
 For real-time frame processing, use [`runOnFrame`](../../03-hooks/02-computer-vision/visioncamera-integration.md) instead.
 
+### Example with Options
+
+```typescript
+const detections = await model.forward(imageUri, {
+  detectionThreshold: 0.5,
+  inputSize: 640, // YOLO models only
+  classesOfInterest: ['PERSON', 'CAR'],
+});
+```
+
 ## Using a custom model
 
 Use [`fromCustomModel`](../../06-api-reference/classes/ObjectDetectionModule.md#fromcustommodel) to load your own exported model binary instead of a built-in preset.

From 9ac45d989ae73af65fc1fab6e5b0ba40aa7f2607 Mon Sep 17 00:00:00 2001
From: benITo47 <bartlomiej.obrochta03@gmail.com>
Date: Fri, 20 Mar 2026 14:56:51 +0100
Subject: [PATCH 3/8] Fix type signatures after rebase

---
 .../tasks/ObjectDetectionTask.tsx             | 25 ++++++++++++++++---
 .../computer_vision/ObjectDetectionModule.ts  |  8 +++---
 .../src/types/objectDetection.ts              | 15 ++++++-----
 3 files changed, 32 insertions(+), 16 deletions(-)

diff --git a/apps/computer-vision/components/vision_camera/tasks/ObjectDetectionTask.tsx b/apps/computer-vision/components/vision_camera/tasks/ObjectDetectionTask.tsx
index 243a3ee09d..e77c959075 100644
--- a/apps/computer-vision/components/vision_camera/tasks/ObjectDetectionTask.tsx
+++ b/apps/computer-vision/components/vision_camera/tasks/ObjectDetectionTask.tsx
@@ -8,6 +8,8 @@ import {
   SSDLITE_320_MOBILENET_V3_LARGE,
   YOLO26N,
   useObjectDetection,
+  CocoLabel,
+  CocoLabelYolo,
 } from 'react-native-executorch';
 import { labelColor, labelColorBg } from '../utils/colors';
 import { TaskProps } from './types';
@@ -50,7 +52,9 @@ export default function ObjectDetectionTask({
         ? rfdetr
         : yolo26n;
 
-  const [detections, setDetections] = useState<Detection[]>([]);
+  type CommonDetection = Omit<Detection, 'label'> & { label: string };
+
+  const [detections, setDetections] = useState<CommonDetection[]>([]);
   const [imageSize, setImageSize] = useState({ width: 1, height: 1 });
   const lastFrameTimeRef = useRef(Date.now());
 
@@ -69,8 +73,19 @@ export default function ObjectDetectionTask({
   const detRof = active.runOnFrame;
 
   const updateDetections = useCallback(
-    (p: { results: Detection[]; imageWidth: number; imageHeight: number }) => {
-      setDetections(p.results);
+    (p: {
+      results:
+        | Detection<typeof CocoLabel>[]
+        | Detection<typeof CocoLabelYolo>[];
+      imageWidth: number;
+      imageHeight: number;
+    }) => {
+      setDetections(
+        p.results.map((det) => ({
+          ...det,
+          label: String(det.label),
+        }))
+      );
       setImageSize({ width: p.imageWidth, height: p.imageHeight });
       const now = Date.now();
       const diff = now - lastFrameTimeRef.current;
@@ -95,7 +110,9 @@ export default function ObjectDetectionTask({
         try {
           if (!detRof) return;
           const isFrontCamera = cameraPositionSync.getDirty() === 'front';
-          const result = detRof(frame, isFrontCamera, 0.5);
+          const result = detRof(frame, isFrontCamera, {
+            detectionThreshold: 0.5,
+          });
           // Sensor frames are landscape-native, so width/height are swapped
           // relative to portrait screen orientation.
           const screenW = frame.height;
diff --git a/packages/react-native-executorch/src/modules/computer_vision/ObjectDetectionModule.ts b/packages/react-native-executorch/src/modules/computer_vision/ObjectDetectionModule.ts
index 93d08c0c9c..99d0fa7c5d 100644
--- a/packages/react-native-executorch/src/modules/computer_vision/ObjectDetectionModule.ts
+++ b/packages/react-native-executorch/src/modules/computer_vision/ObjectDetectionModule.ts
@@ -132,9 +132,7 @@ export class ObjectDetectionModule<
 
   /**
    * Returns the available input sizes for this model, or undefined if the model accepts any size.
-   *
    * @returns An array of available input sizes, or undefined if not constrained.
-   *
    * @example
    * ```typescript
    * const sizes = model.getAvailableInputSizes(); // [384, 512, 640] for YOLO models, or undefined for RF-DETR
@@ -146,10 +144,12 @@ export class ObjectDetectionModule<
 
   /**
    * Override runOnFrame to provide an options-based API for VisionCamera integration.
+   * @returns A worklet function for frame processing, or null if the model is not loaded.
    */
   override get runOnFrame():
     | ((
         frame: any,
+        isFrontCamera: boolean,
         options?: ObjectDetectionOptions<ResolveLabels<T>>
       ) => Detection<ResolveLabels<T>>[])
     | null {
@@ -171,6 +171,7 @@ export class ObjectDetectionModule<
 
     return (
       frame: any,
+      isFrontCamera: boolean,
       options?: ObjectDetectionOptions<ResolveLabels<T>>
     ): Detection<ResolveLabels<T>>[] => {
       'worklet';
@@ -192,6 +193,7 @@ export class ObjectDetectionModule<
 
       return baseRunOnFrame(
         frame,
+        isFrontCamera,
         detectionThreshold,
         iouThreshold,
         classIndices,
@@ -206,12 +208,10 @@ export class ObjectDetectionModule<
    * Supports two input types:
    * 1. **String path/URI**: File path, URL, or Base64-encoded string
    * 2. **PixelData**: Raw pixel data from image libraries (e.g., NitroImage)
-   *
    * @param input - A string image source (file path, URI, or Base64) or a {@link PixelData} object.
    * @param options - Optional configuration for detection inference. Includes `detectionThreshold`, `inputSize`, and `classesOfInterest`.
    * @returns A Promise resolving to an array of {@link Detection} objects.
    * @throws {RnExecutorchError} If the model is not loaded or if an invalid `inputSize` is provided.
-   *
    * @example
    * ```typescript
    * const detections = await model.forward('path/to/image.jpg', {
diff --git a/packages/react-native-executorch/src/types/objectDetection.ts b/packages/react-native-executorch/src/types/objectDetection.ts
index bdb380c0ac..ca33697d93 100644
--- a/packages/react-native-executorch/src/types/objectDetection.ts
+++ b/packages/react-native-executorch/src/types/objectDetection.ts
@@ -34,7 +34,6 @@ export interface Detection<L extends LabelEnum = typeof CocoLabel> {
 
 /**
  * Options for configuring object detection inference.
- *
  * @category Types
  * @typeParam L - The label enum type for filtering classes of interest.
  * @property {number} [detectionThreshold] - Minimum confidence score for detections (0-1). Defaults to model-specific value.
@@ -166,9 +165,7 @@ export interface ObjectDetectionType<L extends LabelEnum> {
   /**
    * Returns the available input sizes for multi-method models (e.g., YOLO).
    * Returns undefined for single-method models (e.g., RF-DETR, SSDLite).
-   *
    * @returns Array of available input sizes or undefined
-   *
    * @example
    * ```typescript
    * const sizes = model.getAvailableInputSizes(); // [384, 512, 640] for YOLO models
@@ -189,9 +186,11 @@ export interface ObjectDetectionType<L extends LabelEnum> {
    * @param options - Optional configuration for detection inference
    * @returns Array of Detection objects representing detected items in the frame.
    */
-  runOnFrame: (
-    frame: Frame,
-    isFrontCamera: boolean,
-    options?: ObjectDetectionOptions<L>
-  ) => Detection<L>[];
+  runOnFrame:
+    | ((
+        frame: Frame,
+        isFrontCamera: boolean,
+        options?: ObjectDetectionOptions<L>
+      ) => Detection<L>[])
+    | null;
 }

From 4e64116d9a610d88df352e55fb4e15174cdbb0ed Mon Sep 17 00:00:00 2001
From: benITo47 <bartlomiej.obrochta03@gmail.com>
Date: Fri, 20 Mar 2026 15:27:22 +0100
Subject: [PATCH 4/8] Fix inproper rebase merge

---
 .../rnexecutorch/models/object_detection/ObjectDetection.cpp  | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.cpp b/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.cpp
index eb0943c2f6..be1eb539a2 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.cpp
+++ b/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.cpp
@@ -206,8 +206,10 @@ std::vector<types::Detection> ObjectDetection::generateFromFrame(
     jsi::Runtime &runtime, const jsi::Value &frameData,
     double detectionThreshold, double iouThreshold,
     std::vector<int32_t> classIndices, std::string methodName) {
+  auto orient = ::rnexecutorch::utils::readFrameOrientation(runtime, frameData);
   cv::Mat frame = extractFromFrame(runtime, frameData);
-  auto detections = runInference(frame, detectionThreshold, iouThreshold,
+  cv::Mat rotated = ::rnexecutorch::utils::rotateFrameForModel(frame, orient);
+  auto detections = runInference(rotated, detectionThreshold, iouThreshold,
                                  classIndices, methodName);
 
   for (auto &det : detections) {

From f327983ab95dfe56f2e2e21371c793e5e1a65bed Mon Sep 17 00:00:00 2001
From: Mateusz Sluszniak <56299341+msluszniak@users.noreply.github.com>
Date: Mon, 23 Mar 2026 10:44:28 +0100
Subject: [PATCH 5/8] Update
 packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.cpp

---
 .../rnexecutorch/models/object_detection/ObjectDetection.cpp    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.cpp b/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.cpp
index be1eb539a2..69dc60fc4e 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.cpp
+++ b/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.cpp
@@ -127,7 +127,7 @@ ObjectDetection::postprocess(const std::vector<EValue> &tensors,
     float x2 = bboxes[i * 4 + 2] * widthRatio;
     float y2 = bboxes[i * 4 + 3] * heightRatio;
 
-    if (static_cast<std::size_t>(labelIdx) >= labelNames_.size()) {
+    if (std::cmp_greater(labelIdx, labelNames_.size()) {
       throw RnExecutorchError(
           RnExecutorchErrorCode::InvalidConfig,
           "Model output class index " + std::to_string(labelIdx) +

From 256953e0d526ba6861872517eaafbc39622a851f Mon Sep 17 00:00:00 2001
From: Mateusz Sluszniak <56299341+msluszniak@users.noreply.github.com>
Date: Mon, 23 Mar 2026 10:47:10 +0100
Subject: [PATCH 6/8] Update
 packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.cpp

---
 .../rnexecutorch/models/object_detection/ObjectDetection.cpp    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.cpp b/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.cpp
index 69dc60fc4e..d004148907 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.cpp
+++ b/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.cpp
@@ -127,7 +127,7 @@ ObjectDetection::postprocess(const std::vector<EValue> &tensors,
     float x2 = bboxes[i * 4 + 2] * widthRatio;
     float y2 = bboxes[i * 4 + 3] * heightRatio;
 
-    if (std::cmp_greater(labelIdx, labelNames_.size()) {
+    if (std::cmp_greater_equal(labelIdx, labelNames_.size()) {
       throw RnExecutorchError(
           RnExecutorchErrorCode::InvalidConfig,
           "Model output class index " + std::to_string(labelIdx) +

From 03fcc836bfe3a8e9070c2797babf9d70c06bcaf6 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Mateusz=20S=C5=82uszniak?= <mateusz.sluszniak@swmansion.com>
Date: Mon, 23 Mar 2026 10:50:21 +0100
Subject: [PATCH 7/8] chore close brakets

---
 .../rnexecutorch/models/object_detection/ObjectDetection.cpp    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.cpp b/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.cpp
index d004148907..3013755095 100644
--- a/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.cpp
+++ b/packages/react-native-executorch/common/rnexecutorch/models/object_detection/ObjectDetection.cpp
@@ -127,7 +127,7 @@ ObjectDetection::postprocess(const std::vector<EValue> &tensors,
     float x2 = bboxes[i * 4 + 2] * widthRatio;
     float y2 = bboxes[i * 4 + 3] * heightRatio;
 
-    if (std::cmp_greater_equal(labelIdx, labelNames_.size()) {
+    if (std::cmp_greater_equal(labelIdx, labelNames_.size())) {
       throw RnExecutorchError(
           RnExecutorchErrorCode::InvalidConfig,
           "Model output class index " + std::to_string(labelIdx) +

From f83b8adb39086aa64cca2eb73e4953da84822532 Mon Sep 17 00:00:00 2001
From: Mateusz Sluszniak <56299341+msluszniak@users.noreply.github.com>
Date: Mon, 23 Mar 2026 11:04:34 +0100
Subject: [PATCH 8/8] Apply suggestions from code review

Co-authored-by: Mateusz Sluszniak <56299341+msluszniak@users.noreply.github.com>
---
 apps/computer-vision/app/object_detection/index.tsx | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/apps/computer-vision/app/object_detection/index.tsx b/apps/computer-vision/app/object_detection/index.tsx
index 399939cfcb..28a49dfdf1 100644
--- a/apps/computer-vision/app/object_detection/index.tsx
+++ b/apps/computer-vision/app/object_detection/index.tsx
@@ -8,6 +8,10 @@ import {
   RF_DETR_NANO,
   SSDLITE_320_MOBILENET_V3_LARGE,
   YOLO26N,
+  YOLO26S,
+  YOLO26M,
+  YOLO26L,
+  YOLO26X,
   ObjectDetectionModelSources,
 } from 'react-native-executorch';
 import { View, StyleSheet, Image } from 'react-native';
@@ -20,6 +24,10 @@ const MODELS: ModelOption<ObjectDetectionModelSources>[] = [
   { label: 'RF-DeTR Nano', value: RF_DETR_NANO },
   { label: 'SSDLite MobileNet', value: SSDLITE_320_MOBILENET_V3_LARGE },
   { label: 'YOLO26N', value: YOLO26N },
+  { label: 'YOLO26S', value: YOLO26S },
+  { label: 'YOLO26M', value: YOLO26M },
+  { label: 'YOLO26L', value: YOLO26L },
+  { label: 'YOLO26X', value: YOLO26X },
 ];
 
 export default function ObjectDetectionScreen() {