biigle · mwuel · Jan 29, 2026 · Jan 30, 2026
diff --git a/.docker/gpu-worker.dockerfile b/.docker/gpu-worker.dockerfile
@@ -26,13 +26,30 @@ RUN apt-get update \
         build-essential \
         git \
         libvips \
+        wget \
+    && wget -qO /tmp/cuda-keyring.deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb \
+    && dpkg -i /tmp/cuda-keyring.deb \
+    && apt-get update \
+    && apt-get install -y --no-install-recommends \
+        cuda-nvcc-11-8 \
+        libcusparse-dev-11-8 \
+        libcublas-dev-11-8 \
+        libcusolver-dev-11-8 \
+    && export CUDA_HOME=/usr/local/cuda \
+    && export TORCH_CUDA_ARCH_LIST="7.0;7.5;8.0;8.6;8.9" \
     && pip3 install --no-cache-dir -r /tmp/requirements.txt \
+    && pip3 install --no-cache-dir --no-build-isolation git+https://github.com/Gy920/segment-anything-2-real-time \
     # Use --no-dependencies so torch is not installed again.
     # Uncomment this if you have an actual GPU.
     # && pip3 install --no-dependencies --index-url https://download.pytorch.org/whl/cu118 xformers==0.0.23 \
     && apt-get purge -y \
         build-essential \
         git \
+        wget \
+        cuda-nvcc-11-8 \
+        libcusparse-dev-11-8 \
+        libcublas-dev-11-8 \
+        libcusolver-dev-11-8 \
     && apt-get -y autoremove \
     && apt-get clean \
     && rm -r /var/lib/apt/lists/* \

diff --git a/app/Jobs/TrackObject.php b/app/Jobs/TrackObject.php
@@ -124,11 +124,13 @@ protected function getTrackingKeyframes(VideoAnnotation $annotation)
     {
         return FileCache::get($annotation->video, function ($video, $path) use ($annotation) {
             $script = config('videos.object_tracker_script');
-
+            $checkpointUrl = config('videos.model_url');
+            $checkpointPath = config('videos.model_path');
+            $this->maybeDownloadCheckpoint($checkpointUrl, $checkpointPath);
             try {
                 $inputPath = $this->createInputJson($annotation, $path);
                 $outputPath = $this->getOutputJsonPath($annotation->id);
-                $output = $this->python("{$script} {$inputPath} {$outputPath}");
+                $output = $this->python("{$script} {$inputPath} {$outputPath} {$checkpointPath}");
                 $keyframes = json_decode(File::get($outputPath), true);
             } catch (Exception $e) {
                 $input = File::get($inputPath);
@@ -147,6 +149,26 @@ protected function getTrackingKeyframes(VideoAnnotation $annotation)
         });
     }
 
+    /**
+     * Downloads the model checkpoint if they weren't downloaded yet.
+     *
+     * @param string $from
+     * @param string $to
+     */
+    protected function maybeDownloadCheckpoint($from, $to)
+    {
+        if (!File::exists($to)) {
+            if (!File::exists(dirname($to))) {
+                File::makeDirectory(dirname($to), 0700, true, true);
+            }
+            $success = @copy($from, $to);
+
+            if (!$success) {
+                throw new Exception("Failed to download checkpoint from '{$from}'.");
+            }
+        }
+    }
+
     /**
      * Get the path to to input file for the object tracking script.
      *

diff --git a/config/services.php b/config/services.php
@@ -25,22 +25,22 @@
     ],
 
     'lifesciencelogin' => [
-       'client_id' => env('LSLOGIN_CLIENT_ID'),
-       'client_secret' => env('LSLOGIN_CLIENT_SECRET'),
-       'redirect' => '/auth/lslogin/callback',
+        'client_id' => env('LSLOGIN_CLIENT_ID'),
+        'client_secret' => env('LSLOGIN_CLIENT_SECRET'),
+        'redirect' => '/auth/lslogin/callback',
     ],
 
     'nfdilogin' => [
-       'client_id' => env('NFDILOGIN_CLIENT_ID'),
-       'client_secret' => env('NFDILOGIN_CLIENT_SECRET'),
-       'redirect' => '/auth/iam4nfdi/callback',
-   ],
-
-   'haai' => [
-       'client_id' => env('HAAI_CLIENT_ID'),
-       'client_secret' => env('HAAI_CLIENT_SECRET'),
-       'redirect' => '/auth/haai/callback',
-   ],
+        'client_id' => env('NFDILOGIN_CLIENT_ID'),
+        'client_secret' => env('NFDILOGIN_CLIENT_SECRET'),
+        'redirect' => '/auth/iam4nfdi/callback',
+    ],
+
+    'haai' => [
+        'client_id' => env('HAAI_CLIENT_ID'),
+        'client_secret' => env('HAAI_CLIENT_SECRET'),
+        'redirect' => '/auth/haai/callback',
+    ],
 
     'resend' => [
         'key' => env('RESEND_KEY'),

diff --git a/config/videos.php b/config/videos.php
@@ -15,7 +15,20 @@
     /*
     | Path to the object tracking script.
     */
-    'object_tracker_script' => __DIR__ . '/../resources/scripts/ObjectTracker.py',
+    // 'object_tracker_script' => __DIR__ . '/../resources/scripts/ObjectTracker_new.py',
+    'object_tracker_script' => __DIR__ . '/../resources/scripts/ObjectTracker_new.py',
+
+    /*
+    | URL from which to download the model checkpoint.
+    |
+    | See: https://github.com/facebookresearch/segment-anything#model-checkpoints
+    */
+    'model_url' => env('SAM2_MODEL_URL', 'https://dl.fbaipublicfiles.com/segment_anything_2/092824/sam2.1_hiera_small.pt'),
+
+    /*
+    | Path to store the model checkpoint to.
+    */
+    'model_path' => storage_path('videos').'/sam_checkpoint.pt',
 
     /*
     | Distance in pixels between the annotation center positions or circle radii of two
@@ -44,7 +57,7 @@
      | Specifies which queue should be used for which job.
      */
     'process_new_video_queue' => env('VIDEOS_PROCESS_NEW_VIDEO_QUEUE', 'default'),
-    'track_object_queue' => env('VIDEOS_TRACK_OBJECT_QUEUE', 'high'),
+    'track_object_queue' => 'gpu',
 
     /*
      | Specifies the maximum number of running object tracking jobs per user. If the user

diff --git a/docker-compose.yml b/docker-compose.yml
@@ -36,24 +36,31 @@ services:
     init: true
     command: "php -d memory_limit=1G artisan queue:work --queue=high,default --sleep=5 --tries=3 --timeout=0 --memory=0"
 
-  # gpu-worker:
-  #   image: biigle/gpu-worker
-  #   build:
-  #     context: ./.docker/
-  #     dockerfile: gpu-worker.dockerfile
-  #   user: ${USER_ID}:${GROUP_ID}
-  #   depends_on:
-  #     - app
-  #     - database_testing
-  #   tmpfs:
-  #     - /tmp
-  #   volumes_from:
-  #     - app
-  #   ipc: host # Required for Torch multiprocessing
-  #   environment:
-  #     - "DB_PORT=5432"
-  #   init: true
-  #   command: "php -d memory_limit=1G artisan queue:work --sleep=5 --tries=1 --timeout=0 --queue=gpu"
+  gpu-worker:
+    image: biigle/gpu-worker
+    build:
+      context: ./.docker/
+      dockerfile: gpu-worker.dockerfile
+    user: ${USER_ID}:${GROUP_ID}
+    depends_on:
+      - app
+      - database_testing
+    tmpfs:
+      - /tmp
+    volumes_from:
+      - app
+    ipc: host # Required for Torch multiprocessing
+    environment:
+      - "DB_PORT=5432"
+    init: true
+    command: "php -d memory_limit=1G artisan queue:work --sleep=5 --tries=1 --timeout=0 --queue=gpu"
+    deploy:
+      resources:
+        reservations:
+          devices:
+            - driver: nvidia
+              count: 1
+              capabilities: [gpu]
 
   websockets:
     image: quay.io/soketi/soketi:1.4-16-alpine

diff --git a/package-lock.json b/package-lock.json
diff --git a/resources/scripts/ObjectTracker_new.py b/resources/scripts/ObjectTracker_new.py
@@ -0,0 +1,125 @@
+import sys
+import numpy as np
+import torch
+import cv2
+from sam2.build_sam import build_sam2_camera_predictor
+import json
+
+if torch.cuda.get_device_properties(0).major >= 8:
+    # turn on tfloat32 for Ampere GPUs (https://pytorch.org/docs/stable/notes/cuda.html#tensorfloat-32-tf32-on-ampere-devices)
+    torch.backends.cuda.matmul.allow_tf32 = True
+    torch.backends.cudnn.allow_tf32 = True
+
+sam2_checkpoint = sys.argv[3]
+model_cfg = "configs/sam2.1/sam2.1_hiera_s.yaml"
+predictor = build_sam2_camera_predictor(model_cfg, sam2_checkpoint)
+
+class ObjectTracker(object):
+    def __init__(self, params):
+        self.video = cv2.VideoCapture(params['video_path'])
+        self.fps = self.video.get(cv2.CAP_PROP_FPS)
+        self.width = self.video.get(cv2.CAP_PROP_FRAME_WIDTH)
+        self.height = self.video.get(cv2.CAP_PROP_FRAME_HEIGHT)
+        self.debug = False
+
+        start_frame = round(params['start_time'] * self.fps)
+        self.video.set(cv2.CAP_PROP_POS_FRAMES, start_frame)
+
+        self.tracker = build_sam2_camera_predictor(model_cfg, sam2_checkpoint)
+        success, frame = self.video.read()
+        if not success:
+            raise IOError('The video file could not be read: {}'.format(params['video_path']))
+        track_window = tuple(map(int, params['start_window']))
+        self.track_width, self.track_height = track_window[2], track_window[3]
+        track_window = (track_window[0],track_window[1],track_window[0]+track_window[2],track_window[1]+track_window[3])
+        # points = np.array([[track_window[0], track_window[1]]], dtype=np.float32)
+        # labels = np.array([1], dtype=np.int32)
+        self.tracker.load_first_frame(frame)
+        if_init = True
+        # self.tracker.add_new_prompt(frame_idx=0, obj_id = 1, points=points, labels=labels)
+        self.tracker.add_new_prompt(frame_idx=0, obj_id = 1, bbox=track_window)
+
+    def center_out_of_frame(self, center):
+        return center[0] <= 1 or center[1] <= 1 or center[0] >= self.width - 1 or center[1] >= self.height - 1
+
+    def __iter__(self):
+        return self
+
+    def __del__(self):
+        if self.video:
+            self.video.release()
+
+        if self.debug:
+            cv2.destroyAllWindows()
+
+    def _next(self):
+        for n in range(2):
+            success, frame = self.video.read()
+
+        if not success:
+            raise StopIteration
+
+        (out_obj_ids, mask_logits) = self.tracker.track(frame)
+        mask = mask_logits[0] > 0
+        coords = np.where(mask[0].detach().cpu().numpy())
+
+        if len(coords[0]) == 0 or len(coords[1]) == 0:
+            raise StopIteration
+
+        box = (np.min(coords[1]),np.min(coords[0]),np.max(coords[1]),np.max(coords[0]))
+
+        center = ((box[0] + box[2]) * 0.5, (box[1] + box[3]) * 0.5)
+
+        # Updates radius based on tracking mask. If the mask gets larger because of adjacent objects, the radius will grow as well.
+        # radius = np.max((np.abs(box[0] - box[2]), np.abs(box[1] - box[3]))) * 0.5
+
+        # Keeps radius of initial annotation
+        radius = np.max((self.track_width, self.track_height)) * 0.5
+
+        if not success:
+            raise StopIteration
+
+        if self.center_out_of_frame(center):
+            raise StopIteration
+
+        current_frame = self.video.get(cv2.CAP_PROP_POS_FRAMES)
+        current_time = current_frame / self.fps
+
+        if self.debug:
+            x, y, w, h = list(map(int, box))
+            cv2.rectangle(frame, (x, y), (x + w, y + h), 255, 2)
+            show_frame = cv2.resize(frame, (1024, 768))
+            cv2.imshow("frame", show_frame)
+            cv2.waitKey(1)
+
+        return (current_time, center[0], center[1], radius)
+
+    def __next__(self):
+        return self._next()
+
+    def next(self):
+        return self._next()
+
+with open(sys.argv[1]) as f:
+    params = json.load(f)
+
+current_keyframe = ()
+last_keyframe = ()
+keyframe_distance = params['keyframe_distance']
+keyframes = []
+
+def keyframes_differ(a, b):
+    return np.sqrt(np.square(a[1] - b[1]) + np.square(a[2] - b[2])) > keyframe_distance or abs(a[3] - b[3]) > keyframe_distance
+
+for keyframe in ObjectTracker(params):
+    current_keyframe = keyframe
+    if not last_keyframe or keyframes_differ(last_keyframe, keyframe):
+        last_keyframe = keyframe
+        keyframes.append(keyframe)
+
+# Add the last keyframe even if it did not have the right keyframe distance.
+if keyframes and keyframes[-1][0] != current_keyframe[0]:
+    keyframes.append(current_keyframe)
+
+with open(sys.argv[2], 'w') as f:
+    json.dump(keyframes, f)
diff --git a/tests/php/Http/Controllers/Api/VideoAnnotationControllerTest.php b/tests/php/Http/Controllers/Api/VideoAnnotationControllerTest.php
@@ -428,7 +428,7 @@ public function testStoreAndTrackPoint()
                 'track' => true,
             ])
             ->assertSuccessful();
-        Queue::assertPushedOn('high', TrackObject::class);
+        Queue::assertPushedOn('gpu', TrackObject::class);
     }
 
     public function testStoreAndTrackRectangle()