diff --git a/.docker/gpu-worker.dockerfile b/.docker/gpu-worker.dockerfile index d1a1db052..bcf8c32ee 100644 --- a/.docker/gpu-worker.dockerfile +++ b/.docker/gpu-worker.dockerfile @@ -26,13 +26,30 @@ RUN apt-get update \ build-essential \ git \ libvips \ + wget \ + && wget -qO /tmp/cuda-keyring.deb https://developer.download.nvidia.com/compute/cuda/repos/ubuntu2204/x86_64/cuda-keyring_1.1-1_all.deb \ + && dpkg -i /tmp/cuda-keyring.deb \ + && apt-get update \ + && apt-get install -y --no-install-recommends \ + cuda-nvcc-11-8 \ + libcusparse-dev-11-8 \ + libcublas-dev-11-8 \ + libcusolver-dev-11-8 \ + && export CUDA_HOME=/usr/local/cuda \ + && export TORCH_CUDA_ARCH_LIST="7.0;7.5;8.0;8.6;8.9" \ && pip3 install --no-cache-dir -r /tmp/requirements.txt \ + && pip3 install --no-cache-dir --no-build-isolation git+https://github.com/Gy920/segment-anything-2-real-time \ # Use --no-dependencies so torch is not installed again. # Uncomment this if you have an actual GPU. # && pip3 install --no-dependencies --index-url https://download.pytorch.org/whl/cu118 xformers==0.0.23 \ && apt-get purge -y \ build-essential \ git \ + wget \ + cuda-nvcc-11-8 \ + libcusparse-dev-11-8 \ + libcublas-dev-11-8 \ + libcusolver-dev-11-8 \ && apt-get -y autoremove \ && apt-get clean \ && rm -r /var/lib/apt/lists/* \ diff --git a/app/Jobs/TrackObject.php b/app/Jobs/TrackObject.php index ca25471bb..d0ccc09ca 100644 --- a/app/Jobs/TrackObject.php +++ b/app/Jobs/TrackObject.php @@ -124,11 +124,13 @@ protected function getTrackingKeyframes(VideoAnnotation $annotation) { return FileCache::get($annotation->video, function ($video, $path) use ($annotation) { $script = config('videos.object_tracker_script'); - + $checkpointUrl = config('videos.model_url'); + $checkpointPath = config('videos.model_path'); + $this->maybeDownloadCheckpoint($checkpointUrl, $checkpointPath); try { $inputPath = $this->createInputJson($annotation, $path); $outputPath = $this->getOutputJsonPath($annotation->id); - $output = $this->python("{$script} {$inputPath} {$outputPath}"); + $output = $this->python("{$script} {$inputPath} {$outputPath} {$checkpointPath}"); $keyframes = json_decode(File::get($outputPath), true); } catch (Exception $e) { $input = File::get($inputPath); @@ -147,6 +149,26 @@ protected function getTrackingKeyframes(VideoAnnotation $annotation) }); } + /** + * Downloads the model checkpoint if they weren't downloaded yet. + * + * @param string $from + * @param string $to + */ + protected function maybeDownloadCheckpoint($from, $to) + { + if (!File::exists($to)) { + if (!File::exists(dirname($to))) { + File::makeDirectory(dirname($to), 0700, true, true); + } + $success = @copy($from, $to); + + if (!$success) { + throw new Exception("Failed to download checkpoint from '{$from}'."); + } + } + } + /** * Get the path to to input file for the object tracking script. * diff --git a/config/services.php b/config/services.php index dde7c0063..81ec9cbea 100644 --- a/config/services.php +++ b/config/services.php @@ -25,22 +25,22 @@ ], 'lifesciencelogin' => [ - 'client_id' => env('LSLOGIN_CLIENT_ID'), - 'client_secret' => env('LSLOGIN_CLIENT_SECRET'), - 'redirect' => '/auth/lslogin/callback', + 'client_id' => env('LSLOGIN_CLIENT_ID'), + 'client_secret' => env('LSLOGIN_CLIENT_SECRET'), + 'redirect' => '/auth/lslogin/callback', ], 'nfdilogin' => [ - 'client_id' => env('NFDILOGIN_CLIENT_ID'), - 'client_secret' => env('NFDILOGIN_CLIENT_SECRET'), - 'redirect' => '/auth/iam4nfdi/callback', - ], - - 'haai' => [ - 'client_id' => env('HAAI_CLIENT_ID'), - 'client_secret' => env('HAAI_CLIENT_SECRET'), - 'redirect' => '/auth/haai/callback', - ], + 'client_id' => env('NFDILOGIN_CLIENT_ID'), + 'client_secret' => env('NFDILOGIN_CLIENT_SECRET'), + 'redirect' => '/auth/iam4nfdi/callback', + ], + + 'haai' => [ + 'client_id' => env('HAAI_CLIENT_ID'), + 'client_secret' => env('HAAI_CLIENT_SECRET'), + 'redirect' => '/auth/haai/callback', + ], 'resend' => [ 'key' => env('RESEND_KEY'), diff --git a/config/videos.php b/config/videos.php index 2e3859ade..846a87e09 100644 --- a/config/videos.php +++ b/config/videos.php @@ -15,7 +15,20 @@ /* | Path to the object tracking script. */ - 'object_tracker_script' => __DIR__ . '/../resources/scripts/ObjectTracker.py', + // 'object_tracker_script' => __DIR__ . '/../resources/scripts/ObjectTracker_new.py', + 'object_tracker_script' => __DIR__ . '/../resources/scripts/ObjectTracker_new.py', + + /* + | URL from which to download the model checkpoint. + | + | See: https://github.com/facebookresearch/segment-anything#model-checkpoints + */ + 'model_url' => env('SAM2_MODEL_URL', 'https://dl.fbaipublicfiles.com/segment_anything_2/092824/sam2.1_hiera_small.pt'), + + /* + | Path to store the model checkpoint to. + */ + 'model_path' => storage_path('videos').'/sam_checkpoint.pt', /* | Distance in pixels between the annotation center positions or circle radii of two @@ -44,7 +57,7 @@ | Specifies which queue should be used for which job. */ 'process_new_video_queue' => env('VIDEOS_PROCESS_NEW_VIDEO_QUEUE', 'default'), - 'track_object_queue' => env('VIDEOS_TRACK_OBJECT_QUEUE', 'high'), + 'track_object_queue' => 'gpu', /* | Specifies the maximum number of running object tracking jobs per user. If the user diff --git a/docker-compose.yml b/docker-compose.yml index af5b815e1..b64c6abcd 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -36,24 +36,31 @@ services: init: true command: "php -d memory_limit=1G artisan queue:work --queue=high,default --sleep=5 --tries=3 --timeout=0 --memory=0" - # gpu-worker: - # image: biigle/gpu-worker - # build: - # context: ./.docker/ - # dockerfile: gpu-worker.dockerfile - # user: ${USER_ID}:${GROUP_ID} - # depends_on: - # - app - # - database_testing - # tmpfs: - # - /tmp - # volumes_from: - # - app - # ipc: host # Required for Torch multiprocessing - # environment: - # - "DB_PORT=5432" - # init: true - # command: "php -d memory_limit=1G artisan queue:work --sleep=5 --tries=1 --timeout=0 --queue=gpu" + gpu-worker: + image: biigle/gpu-worker + build: + context: ./.docker/ + dockerfile: gpu-worker.dockerfile + user: ${USER_ID}:${GROUP_ID} + depends_on: + - app + - database_testing + tmpfs: + - /tmp + volumes_from: + - app + ipc: host # Required for Torch multiprocessing + environment: + - "DB_PORT=5432" + init: true + command: "php -d memory_limit=1G artisan queue:work --sleep=5 --tries=1 --timeout=0 --queue=gpu" + deploy: + resources: + reservations: + devices: + - driver: nvidia + count: 1 + capabilities: [gpu] websockets: image: quay.io/soketi/soketi:1.4-16-alpine diff --git a/package-lock.json b/package-lock.json index 02f2c3da6..c130e8ebe 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,10 +1,9 @@ { - "name": "core", + "name": "biigle", "lockfileVersion": 3, "requires": true, "packages": { "": { - "name": "core", "license": "GPL-3.0-only", "dependencies": { "@biigle/ol": "^9.2.4", diff --git a/resources/scripts/ObjectTracker_new.py b/resources/scripts/ObjectTracker_new.py new file mode 100644 index 000000000..3fb2b1581 --- /dev/null +++ b/resources/scripts/ObjectTracker_new.py @@ -0,0 +1,125 @@ +import sys +import numpy as np +import torch +import cv2 +from sam2.build_sam import build_sam2_camera_predictor +import json + +if torch.cuda.get_device_properties(0).major >= 8: + # turn on tfloat32 for Ampere GPUs (https://pytorch.org/docs/stable/notes/cuda.html#tensorfloat-32-tf32-on-ampere-devices) + torch.backends.cuda.matmul.allow_tf32 = True + torch.backends.cudnn.allow_tf32 = True + +sam2_checkpoint = sys.argv[3] +model_cfg = "configs/sam2.1/sam2.1_hiera_s.yaml" +predictor = build_sam2_camera_predictor(model_cfg, sam2_checkpoint) + +class ObjectTracker(object): + def __init__(self, params): + self.video = cv2.VideoCapture(params['video_path']) + self.fps = self.video.get(cv2.CAP_PROP_FPS) + self.width = self.video.get(cv2.CAP_PROP_FRAME_WIDTH) + self.height = self.video.get(cv2.CAP_PROP_FRAME_HEIGHT) + self.debug = False + + start_frame = round(params['start_time'] * self.fps) + self.video.set(cv2.CAP_PROP_POS_FRAMES, start_frame) + + self.tracker = build_sam2_camera_predictor(model_cfg, sam2_checkpoint) + success, frame = self.video.read() + if not success: + raise IOError('The video file could not be read: {}'.format(params['video_path'])) + track_window = tuple(map(int, params['start_window'])) + self.track_width, self.track_height = track_window[2], track_window[3] + track_window = (track_window[0],track_window[1],track_window[0]+track_window[2],track_window[1]+track_window[3]) + # points = np.array([[track_window[0], track_window[1]]], dtype=np.float32) + # labels = np.array([1], dtype=np.int32) + self.tracker.load_first_frame(frame) + if_init = True + # self.tracker.add_new_prompt(frame_idx=0, obj_id = 1, points=points, labels=labels) + self.tracker.add_new_prompt(frame_idx=0, obj_id = 1, bbox=track_window) + + def center_out_of_frame(self, center): + return center[0] <= 1 or center[1] <= 1 or center[0] >= self.width - 1 or center[1] >= self.height - 1 + + def __iter__(self): + return self + + def __del__(self): + if self.video: + self.video.release() + + if self.debug: + cv2.destroyAllWindows() + + def _next(self): + for n in range(2): + success, frame = self.video.read() + + if not success: + raise StopIteration + + (out_obj_ids, mask_logits) = self.tracker.track(frame) + mask = mask_logits[0] > 0 + coords = np.where(mask[0].detach().cpu().numpy()) + + if len(coords[0]) == 0 or len(coords[1]) == 0: + raise StopIteration + + box = (np.min(coords[1]),np.min(coords[0]),np.max(coords[1]),np.max(coords[0])) + + center = ((box[0] + box[2]) * 0.5, (box[1] + box[3]) * 0.5) + + # Updates radius based on tracking mask. If the mask gets larger because of adjacent objects, the radius will grow as well. + # radius = np.max((np.abs(box[0] - box[2]), np.abs(box[1] - box[3]))) * 0.5 + + # Keeps radius of initial annotation + radius = np.max((self.track_width, self.track_height)) * 0.5 + + if not success: + raise StopIteration + + if self.center_out_of_frame(center): + raise StopIteration + + current_frame = self.video.get(cv2.CAP_PROP_POS_FRAMES) + current_time = current_frame / self.fps + + if self.debug: + x, y, w, h = list(map(int, box)) + cv2.rectangle(frame, (x, y), (x + w, y + h), 255, 2) + show_frame = cv2.resize(frame, (1024, 768)) + cv2.imshow("frame", show_frame) + cv2.waitKey(1) + + return (current_time, center[0], center[1], radius) + + def __next__(self): + return self._next() + + def next(self): + return self._next() + +with open(sys.argv[1]) as f: + params = json.load(f) + +current_keyframe = () +last_keyframe = () +keyframe_distance = params['keyframe_distance'] +keyframes = [] + +def keyframes_differ(a, b): + return np.sqrt(np.square(a[1] - b[1]) + np.square(a[2] - b[2])) > keyframe_distance or abs(a[3] - b[3]) > keyframe_distance + +for keyframe in ObjectTracker(params): + current_keyframe = keyframe + if not last_keyframe or keyframes_differ(last_keyframe, keyframe): + last_keyframe = keyframe + keyframes.append(keyframe) + +# Add the last keyframe even if it did not have the right keyframe distance. +if keyframes and keyframes[-1][0] != current_keyframe[0]: + keyframes.append(current_keyframe) + +with open(sys.argv[2], 'w') as f: + json.dump(keyframes, f) diff --git a/tests/php/Http/Controllers/Api/VideoAnnotationControllerTest.php b/tests/php/Http/Controllers/Api/VideoAnnotationControllerTest.php index 0aa2ce191..44504b1ed 100644 --- a/tests/php/Http/Controllers/Api/VideoAnnotationControllerTest.php +++ b/tests/php/Http/Controllers/Api/VideoAnnotationControllerTest.php @@ -428,7 +428,7 @@ public function testStoreAndTrackPoint() 'track' => true, ]) ->assertSuccessful(); - Queue::assertPushedOn('high', TrackObject::class); + Queue::assertPushedOn('gpu', TrackObject::class); } public function testStoreAndTrackRectangle()