diff --git a/.github/workflows/generate-from-spec.yml b/.github/workflows/generate-from-spec.yml new file mode 100644 index 0000000..d093e79 --- /dev/null +++ b/.github/workflows/generate-from-spec.yml @@ -0,0 +1,167 @@ +name: Generate SDK from OpenAPI Spec + +on: + push: + paths: + - 'openapi.yaml' + + # Manual trigger for testing + workflow_dispatch: + +jobs: + generate: + runs-on: ubuntu-latest + permissions: + contents: write + pull-requests: write + + steps: + - name: Checkout Python SDK + uses: actions/checkout@v5 + with: + fetch-depth: 0 + + - name: Get branch and diff info + id: info + run: | + # Get the branch name this workflow is running on + BRANCH_NAME="${GITHUB_REF#refs/heads/}" + echo "branch_name=$BRANCH_NAME" >> $GITHUB_OUTPUT + + # Get commit SHAs for diff + BEFORE_SHA="${{ github.event.before }}" + AFTER_SHA="${{ github.sha }}" + + # Handle new branch case + if [[ "$BEFORE_SHA" == "0000000000000000000000000000000000000000" ]]; then + BEFORE_SHA=$(git rev-parse HEAD~1 2>/dev/null || echo "") + fi + + echo "before_sha=$BEFORE_SHA" >> $GITHUB_OUTPUT + echo "after_sha=$AFTER_SHA" >> $GITHUB_OUTPUT + + # Generate diff for the spec file + if [[ -n "$BEFORE_SHA" ]]; then + git diff "$BEFORE_SHA" "$AFTER_SHA" -- openapi.yaml > spec.diff || touch spec.diff + else + touch spec.diff + fi + + echo "Diff size: $(wc -l < spec.diff) lines" + echo "Running on branch: $BRANCH_NAME" + + - name: Fetch prompt and build context + run: | + # Fetch static prompt from agent-toolkit + curl -sL https://raw.githubusercontent.com/video-db/agent-toolkit/main/context/prompts/spec-to-python-sdk.txt > static_prompt.txt + + # Build full prompt with dynamic content + cat > codex_prompt.md << 'PROMPT_EOF' + ## Git Diff of OpenAPI Spec Changes + + The following diff shows what changed in the API specification: + + ```diff + PROMPT_EOF + + cat spec.diff >> codex_prompt.md + + cat >> codex_prompt.md << 'PROMPT_EOF' + ``` + + ## Current OpenAPI Spec + + If you need to reference the full spec for context, it's available at: openapi.yaml + + --- + + PROMPT_EOF + + # Append static instructions + cat static_prompt.txt >> codex_prompt.md + + echo "Prompt built successfully" + + - name: Run Codex + uses: openai/codex-action@v1 + with: + openai-api-key: ${{ secrets.OPENAI_API_KEY }} + model: o4-mini + sandbox: workspace-write + prompt-file: codex_prompt.md + + - name: Check for changes and create PR + id: create_pr + env: + GH_TOKEN: ${{ secrets.GITHUB_TOKEN }} + run: | + # Configure git + git config user.name "github-actions[bot]" + git config user.email "github-actions[bot]@users.noreply.github.com" + + # Check if there are changes + if git diff --quiet && git diff --staged --quiet; then + echo "No changes generated by Codex" + echo "has_changes=false" >> $GITHUB_OUTPUT + exit 0 + fi + + echo "has_changes=true" >> $GITHUB_OUTPUT + + # Clean up temporary files - DO NOT commit these + rm -f spec.diff static_prompt.txt codex_prompt.md + + # Get the base branch name + BASE_BRANCH="${{ steps.info.outputs.branch_name }}" + + # Create work branch from the current branch + WORK_BRANCH="auto/spec-sync-$(date +%Y%m%d-%H%M%S)" + git checkout -b "$WORK_BRANCH" + git add -A + + # Commit + git commit -m "feat: sync with OpenAPI spec changes + + Source branch: ${BASE_BRANCH} + + Generated by OpenAI Codex" + + # Push + git push origin "$WORK_BRANCH" + + echo "work_branch=$WORK_BRANCH" >> $GITHUB_OUTPUT + echo "base_branch=$BASE_BRANCH" >> $GITHUB_OUTPUT + + # Create PR targeting the original branch + gh pr create \ + --base "$BASE_BRANCH" \ + --title "feat: sync with OpenAPI spec" \ + --body "## Summary + + Automated SDK update based on OpenAPI spec changes. + + **Base branch**: \`$BASE_BRANCH\` + + ## Review Checklist + + - [ ] Generated code follows SDK conventions + - [ ] Method signatures are correct + - [ ] No breaking changes introduced + - [ ] Tests pass locally + + --- + *Generated by [OpenAI Codex](https://github.com/openai/codex)*" + + - name: Trigger Node SDK Generation + if: steps.create_pr.outputs.has_changes == 'true' + uses: peter-evans/repository-dispatch@v3 + with: + token: ${{ secrets.SDK_SYNC_PAT }} + repository: ${{ github.repository_owner }}/videodb-node + event-type: python-updated + client-payload: | + { + "source_branch": "${{ steps.create_pr.outputs.work_branch }}", + "target_branch": "${{ steps.create_pr.outputs.base_branch }}", + "trigger_type": "spec_change" + } diff --git a/.github/workflows/notify-node-sdk.yml b/.github/workflows/notify-node-sdk.yml new file mode 100644 index 0000000..e83ba3a --- /dev/null +++ b/.github/workflows/notify-node-sdk.yml @@ -0,0 +1,91 @@ +name: Notify Node SDK on Python Code Changes + +on: + push: + paths: + - 'videodb/*.py' + - 'videodb/**/*.py' + - '!videodb/__about__.py' + - '!videodb/__init__.py' + +jobs: + notify: + runs-on: ubuntu-latest + steps: + - name: Checkout + uses: actions/checkout@v5 + with: + fetch-depth: 0 + + - name: Check if spec also changed + id: check_spec + run: | + BEFORE_SHA="${{ github.event.before }}" + AFTER_SHA="${{ github.sha }}" + + # Handle new branch case + if [[ "$BEFORE_SHA" == "0000000000000000000000000000000000000000" ]]; then + BEFORE_SHA=$(git rev-parse HEAD~1 2>/dev/null || echo "") + fi + + # Check if openapi.yaml changed in this push + if [[ -n "$BEFORE_SHA" ]]; then + SPEC_CHANGED=$(git diff --name-only "$BEFORE_SHA" "$AFTER_SHA" -- openapi.yaml | wc -l) + else + SPEC_CHANGED=0 + fi + + if [[ "$SPEC_CHANGED" -gt 0 ]]; then + echo "spec_changed=true" >> $GITHUB_OUTPUT + echo "Spec also changed - skipping (generate-from-spec.yml will handle this)" + else + echo "spec_changed=false" >> $GITHUB_OUTPUT + echo "Only Python code changed - will notify Node SDK" + fi + + - name: Get branch and changed files + if: steps.check_spec.outputs.spec_changed == 'false' + id: info + run: | + # Get the branch name + BRANCH_NAME="${GITHUB_REF#refs/heads/}" + echo "branch_name=$BRANCH_NAME" >> $GITHUB_OUTPUT + + BEFORE_SHA="${{ github.event.before }}" + AFTER_SHA="${{ github.sha }}" + + # Handle new branch case + if [[ "$BEFORE_SHA" == "0000000000000000000000000000000000000000" ]]; then + BEFORE_SHA=$(git rev-parse HEAD~1 2>/dev/null || echo "") + fi + + # Get changed Python files (comma-separated for JSON) + if [[ -n "$BEFORE_SHA" ]]; then + FILES=$(git diff --name-only "$BEFORE_SHA" "$AFTER_SHA" -- 'videodb/*.py' 'videodb/**/*.py' | grep -v '__about__\|__init__' | tr '\n' ',' | sed 's/,$//' || true) + else + FILES="" + fi + + echo "changed_files=$FILES" >> $GITHUB_OUTPUT + echo "before_sha=$BEFORE_SHA" >> $GITHUB_OUTPUT + echo "after_sha=$AFTER_SHA" >> $GITHUB_OUTPUT + + echo "Branch: $BRANCH_NAME" + echo "Changed files: $FILES" + + - name: Trigger Node SDK Generation + if: steps.check_spec.outputs.spec_changed == 'false' && steps.info.outputs.changed_files != '' + uses: peter-evans/repository-dispatch@v3 + with: + token: ${{ secrets.SDK_SYNC_PAT }} + repository: ${{ github.repository_owner }}/videodb-node + event-type: python-updated + client-payload: | + { + "source_branch": "${{ steps.info.outputs.branch_name }}", + "target_branch": "${{ steps.info.outputs.branch_name }}", + "trigger_type": "code_change", + "changed_files": "${{ steps.info.outputs.changed_files }}", + "before_sha": "${{ steps.info.outputs.before_sha }}", + "after_sha": "${{ steps.info.outputs.after_sha }}" + } diff --git a/.gitignore b/.gitignore index 8ae2cb6..cf56535 100644 --- a/.gitignore +++ b/.gitignore @@ -16,3 +16,5 @@ venv/ .vscode/* example.ipynb example.py +# test_local.py +capture_bin/videodb_capture_bin/bin/* \ No newline at end of file diff --git a/capture_bin/build/lib.macosx-15.5-arm64-cpython-312/videodb_capture_bin/__init__.py b/capture_bin/build/lib.macosx-15.5-arm64-cpython-312/videodb_capture_bin/__init__.py new file mode 100644 index 0000000..2cd7cab --- /dev/null +++ b/capture_bin/build/lib.macosx-15.5-arm64-cpython-312/videodb_capture_bin/__init__.py @@ -0,0 +1,24 @@ +import os +import sys + +def get_binary_path(): + """Returns the absolute path to the recorder binary.""" + # This file is at: capture_bin/videodb_capture_bin/__init__.py + # Binary is at: capture_bin/videodb_capture_bin/bin/recorder (or .exe) + base_dir = os.path.dirname(os.path.abspath(__file__)) + bin_dir = os.path.join(base_dir, "bin") + + if sys.platform == "win32": + binary_name = "recorder.exe" + else: + binary_name = "recorder" + + binary_path = os.path.join(bin_dir, binary_name) + + if not os.path.exists(binary_path): + raise FileNotFoundError( + f"Recorder binary not found at {binary_path}. " + "Please ensure the package was installed correctly for your platform." + ) + + return binary_path diff --git a/capture_bin/build/lib.macosx-15.5-arm64-cpython-312/videodb_capture_bin/bin/recorder b/capture_bin/build/lib.macosx-15.5-arm64-cpython-312/videodb_capture_bin/bin/recorder new file mode 100755 index 0000000..392fc0c Binary files /dev/null and b/capture_bin/build/lib.macosx-15.5-arm64-cpython-312/videodb_capture_bin/bin/recorder differ diff --git a/capture_bin/setup.py b/capture_bin/setup.py new file mode 100644 index 0000000..4139b12 --- /dev/null +++ b/capture_bin/setup.py @@ -0,0 +1,27 @@ +from setuptools import setup, Distribution + +class BinaryDistribution(Distribution): + """Forces the distribution to be platform-specific.""" + def has_ext_modules(foo): + return True + +VERSION = "0.2.4" + +setup( + name="videodb-capture-bin", + version=VERSION, + author="VideoDB", + description="Binary container for VideoDB Capture runtime", + packages=["videodb_capture_bin"], + package_data={ + "videodb_capture_bin": ["bin/*"], + }, + include_package_data=True, + distclass=BinaryDistribution, + classifiers=[ + "Intended Audience :: Developers", + "Programming Language :: Python :: 3", + "License :: OSI Approved :: Apache Software License", + ], + python_requires=">=3.8", +) diff --git a/capture_bin/videodb_capture_bin.egg-info/PKG-INFO b/capture_bin/videodb_capture_bin.egg-info/PKG-INFO new file mode 100644 index 0000000..688645d --- /dev/null +++ b/capture_bin/videodb_capture_bin.egg-info/PKG-INFO @@ -0,0 +1,13 @@ +Metadata-Version: 2.4 +Name: videodb-capture-bin +Version: 0.2.4 +Summary: Binary container for VideoDB Capture runtime +Author: VideoDB +Classifier: Intended Audience :: Developers +Classifier: Programming Language :: Python :: 3 +Classifier: License :: OSI Approved :: Apache Software License +Requires-Python: >=3.8 +Dynamic: author +Dynamic: classifier +Dynamic: requires-python +Dynamic: summary diff --git a/capture_bin/videodb_capture_bin.egg-info/SOURCES.txt b/capture_bin/videodb_capture_bin.egg-info/SOURCES.txt new file mode 100644 index 0000000..4c45807 --- /dev/null +++ b/capture_bin/videodb_capture_bin.egg-info/SOURCES.txt @@ -0,0 +1,6 @@ +setup.py +videodb_capture_bin/__init__.py +videodb_capture_bin.egg-info/PKG-INFO +videodb_capture_bin.egg-info/SOURCES.txt +videodb_capture_bin.egg-info/dependency_links.txt +videodb_capture_bin.egg-info/top_level.txt \ No newline at end of file diff --git a/capture_bin/videodb_capture_bin.egg-info/dependency_links.txt b/capture_bin/videodb_capture_bin.egg-info/dependency_links.txt new file mode 100644 index 0000000..8b13789 --- /dev/null +++ b/capture_bin/videodb_capture_bin.egg-info/dependency_links.txt @@ -0,0 +1 @@ + diff --git a/capture_bin/videodb_capture_bin.egg-info/top_level.txt b/capture_bin/videodb_capture_bin.egg-info/top_level.txt new file mode 100644 index 0000000..b47fdb7 --- /dev/null +++ b/capture_bin/videodb_capture_bin.egg-info/top_level.txt @@ -0,0 +1 @@ +videodb_capture_bin diff --git a/capture_bin/videodb_capture_bin/__init__.py b/capture_bin/videodb_capture_bin/__init__.py new file mode 100644 index 0000000..2cd7cab --- /dev/null +++ b/capture_bin/videodb_capture_bin/__init__.py @@ -0,0 +1,24 @@ +import os +import sys + +def get_binary_path(): + """Returns the absolute path to the recorder binary.""" + # This file is at: capture_bin/videodb_capture_bin/__init__.py + # Binary is at: capture_bin/videodb_capture_bin/bin/recorder (or .exe) + base_dir = os.path.dirname(os.path.abspath(__file__)) + bin_dir = os.path.join(base_dir, "bin") + + if sys.platform == "win32": + binary_name = "recorder.exe" + else: + binary_name = "recorder" + + binary_path = os.path.join(bin_dir, binary_name) + + if not os.path.exists(binary_path): + raise FileNotFoundError( + f"Recorder binary not found at {binary_path}. " + "Please ensure the package was installed correctly for your platform." + ) + + return binary_path diff --git a/openapi.yaml b/openapi.yaml new file mode 100644 index 0000000..5a0aa0b --- /dev/null +++ b/openapi.yaml @@ -0,0 +1,2806 @@ +openapi: 3.0.3 +info: + title: VideoDB Server API + description: | + VideoDB Server API for video, audio, and image processing with AI capabilities. + This API provides comprehensive video management, search, indexing, and AI-powered features. + version: 1.0.0 + contact: + name: VideoDB Support + url: https://videodb.io + license: + name: MIT + url: https://opensource.org/licenses/MIT + +servers: + - url: https://api.videodb.io + description: Production server + - url: https://staging-api.videodb.io + description: Staging server + +security: + - ApiKeyAuth: [] + +components: + securitySchemes: + ApiKeyAuth: + type: apiKey + in: header + name: x-access-token + description: API key for authentication (sk-xxx format) + + schemas: + Error: + type: object + properties: + success: + type: boolean + example: false + message: + type: string + example: "Error message" + error_code: + type: string + example: "ERROR_CODE" + + SuccessResponse: + type: object + properties: + success: + type: boolean + example: true + message: + type: string + example: "Operation successful" + + AsyncResponse: + type: object + properties: + success: + type: boolean + example: true + status: + type: string + enum: [processing, done, failed] + example: "processing" + data: + type: object + properties: + id: + type: string + example: "job-123" + output_url: + type: string + example: "https://api.videodb.io/async-response/job-123" + + User: + type: object + properties: + user_id: + type: string + example: "u-12345" + user_name: + type: string + example: "John Doe" + user_email: + type: string + example: "john@example.com" + collections: + type: array + items: + type: string + example: ["default", "c-67890"] + default_collection: + type: string + example: "default" + + Collection: + type: object + properties: + id: + type: string + example: "default" + name: + type: string + example: "My Collection" + description: + type: string + example: "Collection description" + is_public: + type: boolean + example: false + owner: + type: string + example: "u-12345" + created_at: + type: string + format: date-time + + Video: + type: object + properties: + id: + type: string + example: "m-12345" + name: + type: string + example: "video.mp4" + description: + type: string + example: "Video description" + collection_id: + type: string + example: "default" + length: + type: number + example: 120.5 + size: + type: number + example: 1048576 + stream_url: + type: string + example: "https://stream.videodb.io/v/12345" + player_url: + type: string + example: "https://console.videodb.io/player/12345" + thumbnail_url: + type: string + example: "https://assets.videodb.io/thumb/12345.jpg" + created_at: + type: string + format: date-time + + Audio: + type: object + properties: + id: + type: string + example: "a-12345" + name: + type: string + example: "audio.mp3" + collection_id: + type: string + example: "default" + length: + type: number + example: 60.0 + size: + type: number + example: 524288 + created_at: + type: string + format: date-time + + Image: + type: object + properties: + id: + type: string + example: "img-12345" + name: + type: string + example: "image.jpg" + collection_id: + type: string + example: "default" + width: + type: number + example: 1920 + height: + type: number + example: 1080 + size: + type: number + example: 262144 + url: + type: string + example: "https://assets.videodb.io/img/12345.jpg" + created_at: + type: string + format: date-time + + SearchResult: + type: object + properties: + success: + type: boolean + example: true + data: + type: object + properties: + query: + type: string + example: "search query" + results: + type: array + items: + type: object + properties: + video_id: + type: string + example: "m-12345" + start: + type: number + example: 10.5 + end: + type: number + example: 20.3 + text: + type: string + example: "matched content" + score: + type: number + example: 0.95 + + Timeline: + type: object + properties: + video_id: + type: string + example: "m-12345" + clips: + type: array + items: + type: object + properties: + start: + type: number + example: 0 + end: + type: number + example: 30 + volume: + type: number + example: 1.0 + + BillingUsage: + type: object + properties: + credit_balance: + type: number + example: 100.50 + usage_this_month: + type: number + example: 25.75 + breakdown: + type: object + additionalProperties: + type: number + +paths: + /: + get: + summary: Get service information + description: Returns basic service information + responses: + '200': + description: Service information + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + type: object + properties: + service: + type: string + example: "VideoDB Server" + + /user: + get: + summary: Get user information + security: + - ApiKeyAuth: [] + responses: + '200': + description: User information + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + $ref: '#/components/schemas/User' + '401': + description: Unauthorized + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + + /user/api_key: + get: + summary: Get user API keys + security: + - ApiKeyAuth: [] + responses: + '200': + description: List of API keys + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + type: array + items: + type: object + properties: + key: + type: string + example: "sk-xxx" + created_at: + type: string + format: date-time + + post: + summary: Create new API key + security: + - ApiKeyAuth: [] + responses: + '200': + description: API key created + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + type: object + properties: + api_key: + type: string + example: "sk-xxx" + + /user/api_key/{api_key}: + delete: + summary: Delete API key + security: + - ApiKeyAuth: [] + parameters: + - name: api_key + in: path + required: true + schema: + type: string + example: "sk-xxx" + responses: + '200': + description: API key deleted + content: + application/json: + schema: + $ref: '#/components/schemas/SuccessResponse' + + /collection: + get: + summary: Get user collections + security: + - ApiKeyAuth: [] + responses: + '200': + description: List of collections + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + type: object + properties: + collections: + type: array + items: + $ref: '#/components/schemas/Collection' + default_collection: + type: string + example: "default" + + post: + summary: Create new collection + security: + - ApiKeyAuth: [] + requestBody: + required: true + content: + application/json: + schema: + type: object + properties: + name: + type: string + example: "My New Collection" + description: + type: string + example: "Collection for my videos" + is_public: + type: boolean + example: false + responses: + '200': + description: Collection created + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + $ref: '#/components/schemas/Collection' + + /collection/{collection_id}: + get: + summary: Get collection details + security: + - ApiKeyAuth: [] + parameters: + - name: collection_id + in: path + required: true + schema: + type: string + example: "default" + responses: + '200': + description: Collection details + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + $ref: '#/components/schemas/Collection' + + patch: + summary: Update collection + security: + - ApiKeyAuth: [] + parameters: + - name: collection_id + in: path + required: true + schema: + type: string + example: "default" + requestBody: + required: true + content: + application/json: + schema: + type: object + properties: + name: + type: string + example: "Updated Collection Name" + description: + type: string + example: "Updated description" + is_public: + type: boolean + example: true + responses: + '200': + description: Collection updated + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + $ref: '#/components/schemas/Collection' + + delete: + summary: Delete collection + security: + - ApiKeyAuth: [] + parameters: + - name: collection_id + in: path + required: true + schema: + type: string + example: "default" + responses: + '200': + description: Collection deleted + content: + application/json: + schema: + $ref: '#/components/schemas/SuccessResponse' + + /collection/{collection_id}/upload: + post: + summary: Upload media to collection + security: + - ApiKeyAuth: [] + parameters: + - name: collection_id + in: path + required: true + schema: + type: string + example: "default" + requestBody: + required: true + content: + application/json: + schema: + type: object + required: + - url + properties: + url: + type: string + example: "https://example.com/video.mp4" + name: + type: string + example: "My Video" + media_type: + type: string + enum: [video, audio, image] + example: "video" + callback_url: + type: string + example: "https://webhook.example.com/callback" + responses: + '200': + description: Upload initiated + content: + application/json: + schema: + oneOf: + - $ref: '#/components/schemas/AsyncResponse' + - type: object + properties: + success: + type: boolean + example: true + data: + $ref: '#/components/schemas/Video' + + /collection/{collection_id}/search/: + post: + summary: Search within collection + security: + - ApiKeyAuth: [] + parameters: + - name: collection_id + in: path + required: true + schema: + type: string + example: "default" + requestBody: + required: true + content: + application/json: + schema: + type: object + required: + - query + properties: + query: + type: string + example: "search query" + index_type: + type: string + enum: [spoken_word, scene] + example: "spoken_word" + search_type: + type: string + enum: [semantic, custom] + example: "semantic" + score_threshold: + type: number + example: 0.2 + result_threshold: + type: integer + example: 10 + stitch: + type: boolean + example: true + rerank: + type: boolean + example: false + filter: + type: array + items: + type: object + responses: + '200': + description: Search results + content: + application/json: + schema: + $ref: '#/components/schemas/SearchResult' + + /video/: + get: + summary: List videos + security: + - ApiKeyAuth: [] + parameters: + - name: collection_id + in: query + schema: + type: string + example: "default" + - name: page_index + in: query + schema: + type: integer + example: 0 + - name: count + in: query + schema: + type: integer + maximum: 5000 + example: 50 + responses: + '200': + description: List of videos + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + type: object + properties: + videos: + type: array + items: + $ref: '#/components/schemas/Video' + + /video/{video_id}: + get: + summary: Get video details + security: + - ApiKeyAuth: [] + parameters: + - name: video_id + in: path + required: true + schema: + type: string + pattern: "^m-" + example: "m-12345" + - name: collection_id + in: query + schema: + type: string + example: "default" + responses: + '200': + description: Video details + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + $ref: '#/components/schemas/Video' + + delete: + summary: Delete video + security: + - ApiKeyAuth: [] + parameters: + - name: video_id + in: path + required: true + schema: + type: string + pattern: "^m-" + example: "m-12345" + responses: + '200': + description: Video deleted + content: + application/json: + schema: + $ref: '#/components/schemas/SuccessResponse' + + /video/{video_id}/storage/: + delete: + summary: Delete video storage + security: + - ApiKeyAuth: [] + parameters: + - name: video_id + in: path + required: true + schema: + type: string + pattern: "^m-" + example: "m-12345" + responses: + '200': + description: Video storage deleted + content: + application/json: + schema: + $ref: '#/components/schemas/SuccessResponse' + + /video/{video_id}/stream/: + post: + summary: Create video stream + security: + - ApiKeyAuth: [] + parameters: + - name: video_id + in: path + required: true + schema: + type: string + pattern: "^m-" + example: "m-12345" + requestBody: + required: true + content: + application/json: + schema: + type: object + properties: + format: + type: string + enum: [mp4, webm, hls] + example: "mp4" + quality: + type: string + enum: [low, medium, high] + example: "high" + responses: + '200': + description: Stream created + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + type: object + properties: + stream_url: + type: string + example: "https://stream.videodb.io/v/12345" + + /video/{video_id}/thumbnail/: + get: + summary: Get video thumbnail + security: + - ApiKeyAuth: [] + parameters: + - name: video_id + in: path + required: true + schema: + type: string + pattern: "^m-" + example: "m-12345" + - name: timestamp + in: query + schema: + type: number + example: 10.5 + responses: + '200': + description: Thumbnail URL + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + type: object + properties: + thumbnail_url: + type: string + example: "https://assets.videodb.io/thumb/12345.jpg" + + post: + summary: Generate custom thumbnail + security: + - ApiKeyAuth: [] + parameters: + - name: video_id + in: path + required: true + schema: + type: string + pattern: "^m-" + example: "m-12345" + requestBody: + required: true + content: + application/json: + schema: + type: object + properties: + timestamp: + type: number + example: 10.5 + width: + type: integer + example: 320 + height: + type: integer + example: 180 + responses: + '200': + description: Thumbnail generated + content: + application/json: + schema: + $ref: '#/components/schemas/AsyncResponse' + + /video/{video_id}/thumbnails/: + get: + summary: Get all video thumbnails + security: + - ApiKeyAuth: [] + parameters: + - name: video_id + in: path + required: true + schema: + type: string + pattern: "^m-" + example: "m-12345" + responses: + '200': + description: List of thumbnails + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + type: array + items: + type: object + properties: + timestamp: + type: number + example: 10.5 + url: + type: string + example: "https://assets.videodb.io/thumb/12345_10.jpg" + + /video/{video_id}/transcription/: + get: + summary: Get video transcription + security: + - ApiKeyAuth: [] + parameters: + - name: video_id + in: path + required: true + schema: + type: string + pattern: "^m-" + example: "m-12345" + - name: engine + in: query + schema: + type: string + default: "AAI" + example: "AAI" + - name: start + in: query + schema: + type: number + default: 0 + example: 10.5 + - name: end + in: query + schema: + type: number + default: -1 + example: 60.0 + - name: segmenter + in: query + schema: + type: string + default: "word" + example: "word" + - name: length + in: query + schema: + type: integer + default: 1 + example: 1 + responses: + '200': + description: Transcription data + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + status: + type: string + enum: [completed, processing, failed] + example: "completed" + data: + type: object + properties: + transcript: + type: array + items: + type: object + properties: + text: + type: string + example: "Hello world" + start: + type: number + example: 1.5 + end: + type: number + example: 3.2 + + post: + summary: Generate video transcription + security: + - ApiKeyAuth: [] + parameters: + - name: video_id + in: path + required: true + schema: + type: string + pattern: "^m-" + example: "m-12345" + requestBody: + required: true + content: + application/json: + schema: + type: object + properties: + engine: + type: string + default: "AAI" + example: "AAI" + force: + type: boolean + example: false + language_code: + type: string + example: "en-US" + callback_url: + type: string + example: "https://webhook.example.com/callback" + callback_data: + type: object + responses: + '200': + description: Transcription job started + content: + application/json: + schema: + oneOf: + - $ref: '#/components/schemas/AsyncResponse' + - type: object + properties: + success: + type: boolean + example: true + message: + type: string + example: "transcription already exists" + + /video/{video_id}/index/: + get: + summary: Get video index status + security: + - ApiKeyAuth: [] + parameters: + - name: video_id + in: path + required: true + schema: + type: string + pattern: "^m-" + example: "m-12345" + - name: index_type + in: query + required: true + schema: + type: string + enum: [spoken_word, scene] + example: "spoken_word" + - name: engine + in: query + schema: + type: string + default: "AAI" + example: "AAI" + responses: + '200': + description: Index status + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + status: + type: string + enum: [done, processing, failed] + example: "done" + message: + type: string + example: "Index is available" + + post: + summary: Create video index + security: + - ApiKeyAuth: [] + parameters: + - name: video_id + in: path + required: true + schema: + type: string + pattern: "^m-" + example: "m-12345" + requestBody: + required: true + content: + application/json: + schema: + type: object + properties: + index_type: + type: string + enum: [spoken_word, scene] + default: "spoken_word" + example: "spoken_word" + engine: + type: string + default: "AAI" + example: "AAI" + force: + type: boolean + example: false + language_code: + type: string + example: "en-US" + segmentation_type: + type: string + default: "sentence" + example: "sentence" + callback_url: + type: string + example: "https://webhook.example.com/callback" + responses: + '200': + description: Index job started + content: + application/json: + schema: + $ref: '#/components/schemas/AsyncResponse' + + /video/{video_id}/search/: + post: + summary: Search within video + security: + - ApiKeyAuth: [] + parameters: + - name: video_id + in: path + required: true + schema: + type: string + pattern: "^m-" + example: "m-12345" + requestBody: + required: true + content: + application/json: + schema: + type: object + required: + - query + properties: + query: + type: string + example: "search query" + index_type: + type: string + enum: [spoken_word, scene] + example: "spoken_word" + search_type: + type: string + enum: [semantic, keyword] + example: "semantic" + score_threshold: + type: number + example: 0.2 + result_threshold: + type: integer + example: 10 + stitch: + type: boolean + example: true + scene_index_id: + type: string + example: "idx-12345" + filter: + type: array + items: + type: object + responses: + '200': + description: Search results + content: + application/json: + schema: + $ref: '#/components/schemas/SearchResult' + + /video/{video_id}/scenes/: + get: + summary: Get video scenes + security: + - ApiKeyAuth: [] + parameters: + - name: video_id + in: path + required: true + schema: + type: string + pattern: "^m-" + example: "m-12345" + responses: + '200': + description: List of scenes + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + type: array + items: + type: object + properties: + scene_id: + type: string + example: "scene-123" + start_time: + type: number + example: 10.5 + end_time: + type: number + example: 25.3 + description: + type: string + example: "Scene description" + thumbnail_url: + type: string + example: "https://assets.videodb.io/scene/123.jpg" + + post: + summary: Create video scenes + security: + - ApiKeyAuth: [] + parameters: + - name: video_id + in: path + required: true + schema: + type: string + pattern: "^m-" + example: "m-12345" + requestBody: + required: true + content: + application/json: + schema: + type: object + properties: + scene_type: + type: string + enum: [shot, time_based] + example: "shot" + callback_url: + type: string + example: "https://webhook.example.com/callback" + responses: + '200': + description: Scene creation started + content: + application/json: + schema: + $ref: '#/components/schemas/AsyncResponse' + + /video/{video_id}/scene/{scene_id}/describe/: + post: + summary: Describe video scene + security: + - ApiKeyAuth: [] + parameters: + - name: video_id + in: path + required: true + schema: + type: string + pattern: "^m-" + example: "m-12345" + - name: scene_id + in: path + required: true + schema: + type: string + example: "scene-123" + requestBody: + required: true + content: + application/json: + schema: + type: object + properties: + prompt: + type: string + example: "Describe what happens in this scene" + model_name: + type: string + example: "gpt-4" + responses: + '200': + description: Scene description generated + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + type: object + properties: + description: + type: string + example: "Scene description text" + + /video/{video_id}/frame/{frame_id}/describe/: + post: + summary: Describe video frame + security: + - ApiKeyAuth: [] + parameters: + - name: video_id + in: path + required: true + schema: + type: string + pattern: "^m-" + example: "m-12345" + - name: frame_id + in: path + required: true + schema: + type: string + example: "frame-123" + requestBody: + required: true + content: + application/json: + schema: + type: object + properties: + prompt: + type: string + example: "Describe this frame" + model_name: + type: string + example: "gpt-4-vision" + responses: + '200': + description: Frame description generated + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + type: object + properties: + description: + type: string + example: "Frame description text" + + /video/{video_id}/clip: + post: + summary: Generate video clip + security: + - ApiKeyAuth: [] + parameters: + - name: video_id + in: path + required: true + schema: + type: string + pattern: "^m-" + example: "m-12345" + requestBody: + required: true + content: + application/json: + schema: + type: object + required: + - prompt + properties: + prompt: + type: string + example: "Create a clip about the introduction" + content_type: + type: string + default: "spoken" + example: "spoken" + model_name: + type: string + default: "basic" + example: "basic" + scene_index_id: + type: string + example: "idx-12345" + callback_url: + type: string + example: "https://webhook.example.com/callback" + responses: + '200': + description: Clip generation started + content: + application/json: + schema: + $ref: '#/components/schemas/AsyncResponse' + + /video/{video_id}/workflow/: + post: + summary: Execute video workflow + security: + - ApiKeyAuth: [] + parameters: + - name: video_id + in: path + required: true + schema: + type: string + pattern: "^m-" + example: "m-12345" + requestBody: + required: true + content: + application/json: + schema: + type: object + required: + - workflow_type + properties: + workflow_type: + type: string + enum: [transcribe, index, analyze] + example: "transcribe" + config: + type: object + properties: + language: + type: string + example: "en" + model: + type: string + example: "gpt-4" + responses: + '200': + description: Workflow started + content: + application/json: + schema: + $ref: '#/components/schemas/AsyncResponse' + + /audio/: + get: + summary: List audios + security: + - ApiKeyAuth: [] + parameters: + - name: collection_id + in: query + schema: + type: string + example: "default" + responses: + '200': + description: List of audios + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + type: object + properties: + audios: + type: array + items: + $ref: '#/components/schemas/Audio' + + /audio/{audio_id}: + get: + summary: Get audio details + security: + - ApiKeyAuth: [] + parameters: + - name: audio_id + in: path + required: true + schema: + type: string + pattern: "^a-" + example: "a-12345" + - name: collection_id + in: query + schema: + type: string + example: "default" + responses: + '200': + description: Audio details + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + $ref: '#/components/schemas/Audio' + + delete: + summary: Delete audio + security: + - ApiKeyAuth: [] + parameters: + - name: audio_id + in: path + required: true + schema: + type: string + pattern: "^a-" + example: "a-12345" + responses: + '200': + description: Audio deleted + content: + application/json: + schema: + $ref: '#/components/schemas/SuccessResponse' + + /audio/{audio_id}/generate_url: + post: + summary: Generate audio stream URL + security: + - ApiKeyAuth: [] + parameters: + - name: audio_id + in: path + required: true + schema: + type: string + pattern: "^a-" + example: "a-12345" + requestBody: + required: true + content: + application/json: + schema: + type: object + properties: + format: + type: string + enum: [mp3, wav, flac] + example: "mp3" + quality: + type: string + enum: [low, medium, high] + example: "high" + responses: + '200': + description: Stream URL generated + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + type: object + properties: + stream_url: + type: string + example: "https://stream.videodb.io/a/12345" + + /image/: + get: + summary: List images + security: + - ApiKeyAuth: [] + parameters: + - name: collection_id + in: query + schema: + type: string + example: "default" + responses: + '200': + description: List of images + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + type: object + properties: + images: + type: array + items: + $ref: '#/components/schemas/Image' + + /image/{image_id}: + get: + summary: Get image details + security: + - ApiKeyAuth: [] + parameters: + - name: image_id + in: path + required: true + schema: + type: string + pattern: "^img-" + example: "img-12345" + - name: collection_id + in: query + schema: + type: string + example: "default" + responses: + '200': + description: Image details + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + $ref: '#/components/schemas/Image' + + delete: + summary: Delete image + security: + - ApiKeyAuth: [] + parameters: + - name: image_id + in: path + required: true + schema: + type: string + pattern: "^img-" + example: "img-12345" + responses: + '200': + description: Image deleted + content: + application/json: + schema: + $ref: '#/components/schemas/SuccessResponse' + + /image/{image_id}/generate_url: + post: + summary: Generate image URL + security: + - ApiKeyAuth: [] + parameters: + - name: image_id + in: path + required: true + schema: + type: string + pattern: "^img-" + example: "img-12345" + requestBody: + required: true + content: + application/json: + schema: + type: object + properties: + format: + type: string + enum: [jpg, png, webp] + example: "jpg" + quality: + type: integer + minimum: 1 + maximum: 100 + example: 90 + width: + type: integer + example: 1024 + height: + type: integer + example: 768 + responses: + '200': + description: Image URL generated + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + type: object + properties: + image_url: + type: string + example: "https://assets.videodb.io/img/12345.jpg" + + /collection/{collection_id}/generate/image/: + post: + summary: Generate image using AI + security: + - ApiKeyAuth: [] + parameters: + - name: collection_id + in: path + required: true + schema: + type: string + example: "default" + requestBody: + required: true + content: + application/json: + schema: + type: object + required: + - prompt + properties: + prompt: + type: string + example: "A beautiful sunset over mountains" + aspect_ratio: + type: string + example: "16:9" + callback_url: + type: string + example: "https://webhook.example.com/callback" + responses: + '200': + description: Image generation started + content: + application/json: + schema: + $ref: '#/components/schemas/AsyncResponse' + + /collection/{collection_id}/generate/video/: + post: + summary: Generate video using AI + security: + - ApiKeyAuth: [] + parameters: + - name: collection_id + in: path + required: true + schema: + type: string + example: "default" + requestBody: + required: true + content: + application/json: + schema: + type: object + required: + - prompt + properties: + prompt: + type: string + example: "A cat playing with a ball" + duration: + type: number + example: 5 + callback_url: + type: string + example: "https://webhook.example.com/callback" + responses: + '200': + description: Video generation started + content: + application/json: + schema: + $ref: '#/components/schemas/AsyncResponse' + + /collection/{collection_id}/generate/audio/: + post: + summary: Generate audio using AI + security: + - ApiKeyAuth: [] + parameters: + - name: collection_id + in: path + required: true + schema: + type: string + example: "default" + requestBody: + required: true + content: + application/json: + schema: + type: object + required: + - prompt + - audio_type + properties: + prompt: + type: string + example: "Generate upbeat background music" + audio_type: + type: string + enum: [speech, sound_effect, music] + example: "music" + callback_url: + type: string + example: "https://webhook.example.com/callback" + responses: + '200': + description: Audio generation started + content: + application/json: + schema: + $ref: '#/components/schemas/AsyncResponse' + + /collection/{collection_id}/generate/text/: + post: + summary: Generate text using AI + security: + - ApiKeyAuth: [] + parameters: + - name: collection_id + in: path + required: true + schema: + type: string + example: "default" + requestBody: + required: true + content: + application/json: + schema: + type: object + required: + - prompt + properties: + prompt: + type: string + example: "Summarize the content of this video" + video_id: + type: string + example: "m-12345" + model_name: + type: string + example: "gpt-4" + max_tokens: + type: integer + example: 500 + temperature: + type: number + example: 0.7 + callback_url: + type: string + example: "https://webhook.example.com/callback" + responses: + '200': + description: Text generation started or completed + content: + application/json: + schema: + oneOf: + - $ref: '#/components/schemas/AsyncResponse' + - type: object + properties: + success: + type: boolean + example: true + data: + type: object + properties: + output: + type: string + example: "Generated text content" + + /timeline: + post: + summary: Compile timeline + security: + - ApiKeyAuth: [] + requestBody: + required: true + content: + application/json: + schema: + type: object + required: + - request_type + - timeline + properties: + request_type: + type: string + enum: [compile] + example: "compile" + timeline: + type: array + items: + $ref: '#/components/schemas/Timeline' + responses: + '200': + description: Timeline compilation result + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + type: object + properties: + stream_url: + type: string + example: "https://stream.videodb.io/compiled/12345" + + /billing/usage: + get: + summary: Get billing usage information + security: + - ApiKeyAuth: [] + responses: + '200': + description: Billing usage data + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + $ref: '#/components/schemas/BillingUsage' + + /billing/checkout: + post: + summary: Create billing checkout session + security: + - ApiKeyAuth: [] + requestBody: + required: true + content: + application/json: + schema: + type: object + properties: + mode: + type: string + enum: [payment, subscription] + example: "payment" + plan_id: + type: string + example: "plan-basic" + amount: + type: number + example: 100 + responses: + '200': + description: Checkout URL + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + type: object + properties: + url: + type: string + example: "https://checkout.stripe.com/pay/xxx" + + /billing/checkouts: + get: + summary: Get billing checkout history + security: + - ApiKeyAuth: [] + parameters: + - name: limit + in: query + schema: + type: integer + example: 10 + - name: offset + in: query + schema: + type: integer + example: 0 + responses: + '200': + description: Checkout history + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + type: array + items: + type: object + properties: + id: + type: string + example: "cs_test_xxx" + amount: + type: number + example: 100 + currency: + type: string + example: "usd" + status: + type: string + example: "completed" + created_at: + type: string + format: date-time + + /billing/invoices: + get: + summary: Get billing invoices + security: + - ApiKeyAuth: [] + parameters: + - name: limit + in: query + schema: + type: integer + example: 10 + - name: offset + in: query + schema: + type: integer + example: 0 + responses: + '200': + description: Invoice list + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + type: array + items: + type: object + properties: + id: + type: string + example: "in_xxx" + amount: + type: number + example: 100 + currency: + type: string + example: "usd" + status: + type: string + example: "paid" + pdf_url: + type: string + example: "https://invoice.stripe.com/pdf/xxx" + created_at: + type: string + format: date-time + + /billing/topup: + post: + summary: Create topup payment + security: + - ApiKeyAuth: [] + requestBody: + required: true + content: + application/json: + schema: + type: object + required: + - amount + properties: + amount: + type: number + example: 50 + currency: + type: string + example: "usd" + responses: + '200': + description: Topup checkout URL + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + type: object + properties: + url: + type: string + example: "https://checkout.stripe.com/pay/xxx" + + /billing/auto_recharge: + get: + summary: Get auto recharge settings + security: + - ApiKeyAuth: [] + responses: + '200': + description: Auto recharge settings + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + type: object + properties: + enabled: + type: boolean + example: true + threshold: + type: number + example: 10 + amount: + type: number + example: 50 + + post: + summary: Update auto recharge settings + security: + - ApiKeyAuth: [] + requestBody: + required: true + content: + application/json: + schema: + type: object + properties: + enabled: + type: boolean + example: true + threshold: + type: number + example: 10 + amount: + type: number + example: 50 + responses: + '200': + description: Auto recharge updated + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + message: + type: string + example: "Auto recharge settings updated" + + /async-response/{response_id}: + get: + summary: Get async operation result + parameters: + - name: response_id + in: path + required: true + schema: + type: string + example: "job-12345" + responses: + '200': + description: Operation result + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + status: + type: string + enum: [processing, done, failed] + example: "done" + data: + type: object + description: "Result data varies by operation type" + '404': + description: Response not found + content: + application/json: + schema: + $ref: '#/components/schemas/Error' + + /download: + get: + summary: List download entries + security: + - ApiKeyAuth: [] + parameters: + - name: page_index + in: query + schema: + type: integer + example: 0 + - name: count + in: query + schema: + type: integer + maximum: 5000 + example: 50 + responses: + '200': + description: List of downloads + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + type: object + properties: + downloads: + type: array + items: + type: object + properties: + id: + type: string + example: "download-12345" + name: + type: string + example: "video_download.mp4" + status: + type: string + enum: [processing, done, error] + example: "done" + created_at: + type: string + format: date-time + download_url: + type: string + example: "https://example.com/download/video.mp4" + + post: + summary: Create download request + security: + - ApiKeyAuth: [] + requestBody: + required: true + content: + application/json: + schema: + type: object + required: + - stream_link + properties: + stream_link: + type: string + example: "https://stream.videodb.io/v/12345" + name: + type: string + example: "my_download.mp4" + responses: + '200': + description: Download initiated + content: + application/json: + schema: + $ref: '#/components/schemas/AsyncResponse' + + delete: + summary: Delete download entry + security: + - ApiKeyAuth: [] + parameters: + - name: download_id + in: query + required: true + schema: + type: string + example: "download-12345" + responses: + '200': + description: Download deleted + content: + application/json: + schema: + $ref: '#/components/schemas/SuccessResponse' + + /download/{download_id}: + get: + summary: Get download status/details + security: + - ApiKeyAuth: [] + parameters: + - name: download_id + in: path + required: true + schema: + type: string + example: "download-12345" + responses: + '200': + description: Download status + content: + application/json: + schema: + oneOf: + - $ref: '#/components/schemas/AsyncResponse' + - type: object + properties: + success: + type: boolean + example: true + data: + type: object + properties: + id: + type: string + example: "download-12345" + name: + type: string + example: "video_download.mp4" + status: + type: string + enum: [processing, done, error] + example: "done" + download_url: + type: string + example: "https://example.com/download/video.mp4" + created_at: + type: string + format: date-time + + post: + summary: Retry download + security: + - ApiKeyAuth: [] + parameters: + - name: download_id + in: path + required: true + schema: + type: string + example: "download-12345" + responses: + '200': + description: Download retry initiated + content: + application/json: + schema: + $ref: '#/components/schemas/AsyncResponse' + + /chat/completions: + post: + summary: OpenAI-compatible chat completions proxy + description: Proxy endpoint for OpenAI chat completions API with VideoDB billing + security: + - ApiKeyAuth: [] + requestBody: + required: true + content: + application/json: + schema: + type: object + required: + - model + - messages + properties: + model: + type: string + enum: [gpt-4o-2024-11-20] + example: "gpt-4o-2024-11-20" + messages: + type: array + items: + type: object + properties: + role: + type: string + enum: [system, user, assistant] + example: "user" + content: + type: string + example: "Hello, how are you?" + max_tokens: + type: integer + example: 100 + temperature: + type: number + example: 0.7 + stream: + type: boolean + example: false + responses: + '200': + description: Chat completion response + content: + application/json: + schema: + type: object + properties: + id: + type: string + example: "chatcmpl-123" + object: + type: string + example: "chat.completion" + created: + type: integer + example: 1677652288 + model: + type: string + example: "gpt-4o-2024-11-20" + choices: + type: array + items: + type: object + properties: + index: + type: integer + example: 0 + message: + type: object + properties: + role: + type: string + example: "assistant" + content: + type: string + example: "Hello! I'm doing well, thank you for asking." + finish_reason: + type: string + example: "stop" + usage: + type: object + properties: + prompt_tokens: + type: integer + example: 10 + completion_tokens: + type: integer + example: 15 + total_tokens: + type: integer + example: 25 + + /timeline_v2: + post: + summary: Compile timeline (v2) + security: + - ApiKeyAuth: [] + requestBody: + required: true + content: + application/json: + schema: + type: object + required: + - request_type + - timeline + properties: + request_type: + type: string + enum: [compile] + example: "compile" + timeline: + type: array + items: + $ref: '#/components/schemas/Timeline' + output_format: + type: string + enum: [mp4, webm, hls] + example: "mp4" + quality: + type: string + enum: [low, medium, high] + example: "high" + responses: + '200': + description: Timeline compilation result + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + type: object + properties: + stream_url: + type: string + example: "https://stream.videodb.io/compiled/12345" + duration: + type: number + example: 120.5 + format: + type: string + example: "mp4" + + /timeline_v2/download: + post: + summary: Download compiled timeline + security: + - ApiKeyAuth: [] + requestBody: + required: true + content: + application/json: + schema: + type: object + required: + - timeline_id + properties: + timeline_id: + type: string + example: "timeline-12345" + format: + type: string + enum: [mp4, webm, avi] + example: "mp4" + quality: + type: string + enum: [low, medium, high] + example: "high" + responses: + '200': + description: Download initiated + content: + application/json: + schema: + $ref: '#/components/schemas/AsyncResponse' + + /compile/: + post: + summary: Compile media content + security: + - ApiKeyAuth: [] + requestBody: + required: true + content: + application/json: + schema: + type: object + required: + - inputs + properties: + inputs: + type: array + items: + type: object + properties: + media_id: + type: string + example: "m-12345" + start_time: + type: number + example: 10.0 + end_time: + type: number + example: 30.0 + output_format: + type: string + enum: [mp4, webm, hls] + example: "mp4" + callback_url: + type: string + example: "https://webhook.example.com/callback" + responses: + '200': + description: Compilation started + content: + application/json: + schema: + $ref: '#/components/schemas/AsyncResponse' + + /video/{video_id}/index/scene/: + get: + summary: Get video scene index status + security: + - ApiKeyAuth: [] + parameters: + - name: video_id + in: path + required: true + schema: + type: string + pattern: "^m-" + example: "m-12345" + responses: + '200': + description: Scene index status + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + status: + type: string + enum: [done, processing, failed] + example: "done" + data: + type: object + properties: + scene_count: + type: integer + example: 25 + total_duration: + type: number + example: 120.5 + last_updated: + type: string + format: date-time + + post: + summary: Create video scene index + security: + - ApiKeyAuth: [] + parameters: + - name: video_id + in: path + required: true + schema: + type: string + pattern: "^m-" + example: "m-12345" + requestBody: + required: true + content: + application/json: + schema: + type: object + properties: + scene_type: + type: string + enum: [shot, time_based] + example: "shot" + segmentation_threshold: + type: number + example: 0.8 + callback_url: + type: string + example: "https://webhook.example.com/callback" + responses: + '200': + description: Scene index creation started + content: + application/json: + schema: + $ref: '#/components/schemas/AsyncResponse' + + /video/{video_id}/index/scene/{scene_index_id}: + get: + summary: Get scene index details + security: + - ApiKeyAuth: [] + parameters: + - name: video_id + in: path + required: true + schema: + type: string + pattern: "^m-" + example: "m-12345" + - name: scene_index_id + in: path + required: true + schema: + type: string + example: "scene-idx-12345" + responses: + '200': + description: Scene index details + content: + application/json: + schema: + type: object + properties: + success: + type: boolean + example: true + data: + type: object + properties: + id: + type: string + example: "scene-idx-12345" + video_id: + type: string + example: "m-12345" + scene_type: + type: string + example: "shot" + status: + type: string + enum: [done, processing, failed] + example: "done" + scenes: + type: array + items: + type: object + properties: + start_time: + type: number + example: 10.5 + end_time: + type: number + example: 25.3 + confidence: + type: number + example: 0.85 + + delete: + summary: Delete scene index + security: + - ApiKeyAuth: [] + parameters: + - name: video_id + in: path + required: true + schema: + type: string + pattern: "^m-" + example: "m-12345" + - name: scene_index_id + in: path + required: true + schema: + type: string + example: "scene-idx-12345" + responses: + '200': + description: Scene index deleted + content: + application/json: + schema: + $ref: '#/components/schemas/SuccessResponse' + +tags: + - name: Authentication + description: User authentication and API key management + - name: Collections + description: Collection management operations + - name: Videos + description: Video upload, processing, and management + - name: Audio + description: Audio management operations + - name: Images + description: Image management operations + - name: Search + description: Content search and indexing + - name: AI Generation + description: AI-powered content generation + - name: Billing + description: Billing and usage management + - name: RTStream + description: Real-time streaming operations + - name: Utilities + description: Utility endpoints + - name: Meeting + description: Meeting recording and management \ No newline at end of file diff --git a/requirements.txt b/requirements.txt index 1159ddd..705f032 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ requests==2.31.0 backoff==2.2.1 tqdm==4.66.1 +websockets>=12.0 diff --git a/setup.py b/setup.py index db2f955..000db5f 100644 --- a/setup.py +++ b/setup.py @@ -28,13 +28,16 @@ long_description=long_description, long_description_content_type="text/markdown", url=about["__url__"], - packages=find_packages(exclude=["tests", "tests.*"]), + packages=find_packages(exclude=["tests", "tests.*", "capture_bin", "videodb_capture_bin"]), python_requires=">=3.8", install_requires=[ "requests>=2.25.1", "backoff>=2.2.1", "tqdm>=4.66.1", ], + extras_require={ + "capture": ["videodb-capture-bin>=0.2.4"], + }, classifiers=[ "Intended Audience :: Developers", "Programming Language :: Python :: 3", diff --git a/videodb/__about__.py b/videodb/__about__.py index 89bbe9e..e73cffb 100644 --- a/videodb/__about__.py +++ b/videodb/__about__.py @@ -2,7 +2,7 @@ -__version__ = "0.3.0" +__version__ = "0.4.0" __title__ = "videodb" __author__ = "videodb" __email__ = "contact@videodb.io" diff --git a/videodb/__init__.py b/videodb/__init__.py index 41244dc..c3132bc 100644 --- a/videodb/__init__.py +++ b/videodb/__init__.py @@ -20,8 +20,24 @@ ResizeMode, VideoConfig, AudioConfig, + ReframeMode, + SegmentationType, ) from videodb.client import Connection +from videodb.capture_session import CaptureSession +from videodb.websocket_client import WebSocketConnection +from videodb.capture import CaptureClient, Channel, AudioChannel, VideoChannel, Channels + +__all__ = [ + "connect", + "CaptureSession", + "WebSocketConnection", + "CaptureClient", + "Channel", + "AudioChannel", + "VideoChannel", + "Channels", +] from videodb.exceptions import ( VideodbError, AuthenticationError, @@ -51,6 +67,8 @@ "ResizeMode", "VideoConfig", "AudioConfig", + "ReframeMode", + "SegmentationType", ] diff --git a/videodb/_constants.py b/videodb/_constants.py index 0654846..4e2f157 100644 --- a/videodb/_constants.py +++ b/videodb/_constants.py @@ -12,6 +12,12 @@ class MediaType: image = "image" +class RTStreamChannelType: + mic = "mic" + screen = "screen" + system_audio = "system_audio" + + class SearchType: semantic = "semantic" keyword = "keyword" @@ -27,6 +33,7 @@ class IndexType: class SceneExtractionType: shot_based = "shot" time_based = "time" + transcript = "transcript" class Workflows: @@ -49,6 +56,11 @@ class Segmenter: sentence = "sentence" +class SegmentationType: + sentence = "sentence" + llm = "llm" + + class ApiPath: collection = "collection" upload = "upload" @@ -91,6 +103,11 @@ class ApiPath: record = "record" editor = "editor" reframe = "reframe" + clip = "clip" + capture = "capture" + session = "session" + token = "token" + websocket = "websocket" class Status: @@ -101,6 +118,7 @@ class Status: class MeetingStatus: initializing = "initializing" processing = "processing" + joined = "joined" done = "done" diff --git a/videodb/_upload.py b/videodb/_upload.py index 399d527..ebb937c 100644 --- a/videodb/_upload.py +++ b/videodb/_upload.py @@ -29,6 +29,7 @@ def upload( callback_url: Optional[str] = None, file_path: Optional[str] = None, url: Optional[str] = None, + collection_id: Optional[str] = None, ) -> dict: """Upload a file or URL. @@ -40,9 +41,12 @@ def upload( :param str callback_url: URL to receive the callback (optional) :param str file_path: Path to the file to be uploaded :param str url: URL of the file to be uploaded + :param str collection_id: ID of the collection to upload to (optional) :return: Dictionary containing upload response data :rtype: dict """ + collection_id = collection_id or _connection.collection_id + if source and (file_path or url): raise VideodbError("source cannot be used with file_path or url") @@ -68,7 +72,7 @@ def upload( try: name = file_path.split("/")[-1].split(".")[0] if not name else name upload_url_data = _connection.get( - path=f"{ApiPath.collection}/{_connection.collection_id}/{ApiPath.upload_url}", + path=f"{ApiPath.collection}/{collection_id}/{ApiPath.upload_url}", params={"name": name}, ) upload_url = upload_url_data.get("upload_url") @@ -85,7 +89,7 @@ def upload( raise VideodbError("Error while uploading file", cause=e) upload_data = _connection.post( - path=f"{ApiPath.collection}/{_connection.collection_id}/{ApiPath.upload}", + path=f"{ApiPath.collection}/{collection_id}/{ApiPath.upload}", data={ "url": url, "name": name, diff --git a/videodb/capture.py b/videodb/capture.py new file mode 100644 index 0000000..0c1773a --- /dev/null +++ b/videodb/capture.py @@ -0,0 +1,411 @@ +import logging +import asyncio +import json +import uuid +import os +from typing import Optional, Dict, List, Any + +from videodb._constants import VIDEO_DB_API + +logger = logging.getLogger(__name__) + +def get_recorder_path(): + """ + Attempts to find the path to the recorder binary. + If the optional 'videodb-capture-bin' package is not installed, + it raises a RuntimeError with instructions. + """ + try: + import videodb_capture_bin + return videodb_capture_bin.get_binary_path() + except ImportError: + error_msg = ( + "Capture runtime not found.\n" + "To use recording features, please install the capture dependencies:\n" + "pip install 'videodb[capture]'" + ) + logger.error(error_msg) + raise RuntimeError(error_msg) + except Exception as e: + logger.error(f"Failed to resolve recorder path: {e}") + raise + + +class Channel: + """Base class for capture channels.""" + + def __init__( + self, + id: str, + name: str, + type: str, + client: Optional["CaptureClient"] = None, + ): + """Object representing a capture channel. + + :param str id: The unique ID of the channel. + :param str name: The display name of the channel. + :param str type: The type of the channel (audio/video). + :param CaptureClient client: Reference to the capture client. + """ + self.id = id + self.name = name + self.type = type + self._client = client + + def __repr__(self): + return f"Channel(id={self.id}, name={self.name}, type={self.type})" + + async def pause(self) -> None: + """Pause recording for this channel.""" + if not self._client: + raise RuntimeError("Channel not bound to a CaptureClient") + + track_map = { + "audio": "mic" if "mic" in self.id else "system_audio", + "video": "screen", + } + track = track_map.get(self.type) + if track: + await self._client._send_command("pauseTracks", {"tracks": [track]}) + + async def resume(self) -> None: + """Resume recording for this channel.""" + if not self._client: + raise RuntimeError("Channel not bound to a CaptureClient") + + track_map = { + "audio": "mic" if "mic" in self.id else "system_audio", + "video": "screen", + } + track = track_map.get(self.type) + if track: + await self._client._send_command("resumeTracks", {"tracks": [track]}) + + def to_dict(self) -> Dict[str, Any]: + """Return dictionary representation of the channel.""" + return { + "channel_id": self.id, + "type": self.type, + "name": self.name, + "record": True, + "store": True, + } + + +class AudioChannel(Channel): + """Represents an audio source channel.""" + + def __init__(self, id: str, name: str, client: Optional["CaptureClient"] = None): + super().__init__(id, name, type="audio", client=client) + + def __repr__(self): + return f"AudioChannel(id={self.id}, name={self.name})" + + +class VideoChannel(Channel): + """Represents a video source channel.""" + + def __init__(self, id: str, name: str, client: Optional["CaptureClient"] = None): + super().__init__(id, name, type="video", client=client) + + def __repr__(self): + return f"VideoChannel(id={self.id}, name={self.name})" + + +class Channels: + """Container for available channels, grouped by type.""" + + def __init__( + self, + mics: List[AudioChannel] = None, + displays: List[VideoChannel] = None, + system_audio: List[AudioChannel] = None, + ): + self.mics: List[AudioChannel] = mics or [] + self.displays: List[VideoChannel] = displays or [] + self.system_audio: List[AudioChannel] = system_audio or [] + + def __repr__(self): + return ( + f"Channels(" + f"mics={len(self.mics)}, " + f"displays={len(self.displays)}, " + f"system_audio={len(self.system_audio)})" + ) + + @property + def default_mic(self) -> Optional[AudioChannel]: + """Get the default microphone channel.""" + return self.mics[0] if self.mics else None + + @property + def default_display(self) -> Optional[VideoChannel]: + """Get the default display channel.""" + return self.displays[0] if self.displays else None + + @property + def default_system_audio(self) -> Optional[AudioChannel]: + """Get the default system audio channel.""" + return self.system_audio[0] if self.system_audio else None + + def all(self) -> List[Channel]: + """Return a flat list of all channels.""" + return self.mics + self.displays + self.system_audio + + +class CaptureClient: + """Client for managing local capture sessions.""" + + def __init__( + self, + upload_token: str, + base_url: Optional[str] = None, + ): + """Initialize the capture client. + + :param str upload_token: Upload token for the capture session. + :param str base_url: VideoDB API endpoint URL. + """ + self.upload_token = upload_token + self.base_url = base_url or os.environ.get("VIDEO_DB_API", VIDEO_DB_API) + self._session_id: Optional[str] = None + self._proc = None + self._futures: Dict[str, asyncio.Future] = {} + self._binary_path = get_recorder_path() + self._event_queue = asyncio.Queue() + + def __repr__(self) -> str: + return f"CaptureClient(base_url={self.base_url})" + + async def _ensure_process(self): + """Ensure the recorder binary is running.""" + if self._proc is not None and self._proc.returncode is None: + return + + self._proc = await asyncio.create_subprocess_exec( + self._binary_path, + stdin=asyncio.subprocess.PIPE, + stdout=asyncio.subprocess.PIPE, + stderr=asyncio.subprocess.PIPE, + ) + + asyncio.create_task(self._read_stdout_loop()) + asyncio.create_task(self._read_stderr_loop()) + + await self._send_command("init", {"apiUrl": self.base_url}) + + + async def _send_command( + self, command: str, params: Optional[Dict[str, Any]] = None + ) -> Dict[str, Any]: + """Send a command to the recorder binary and await response. + + :param str command: Command name. + :param dict params: Command parameters. + :return: Response result. + :rtype: dict + """ + await self._ensure_process() + + command_id = str(uuid.uuid4()) + payload = { + "command": command, + "commandId": command_id, + "params": params or {}, + } + + # Framing: videodb_recorder|\n + message = f"videodb_recorder|{json.dumps(payload)}\n" + self._proc.stdin.write(message.encode("utf-8")) + await self._proc.stdin.drain() + + # Create future to await response + loop = asyncio.get_running_loop() + future = loop.create_future() + self._futures[command_id] = future + + try: + return await future + finally: + self._futures.pop(command_id, None) + + async def _read_stdout_loop(self): + """Loop to read stdout and process messages.""" + while True: + line = await self._proc.stdout.readline() + if not line: + break + + line_str = line.decode("utf-8").strip() + if not line_str.startswith("videodb_recorder|"): + continue + + try: + json_part = line_str.split("|", 1)[1] + data = json.loads(json_part) + + msg_type = data.get("type") + if msg_type == "response": + cmd_id = data.get("commandId") + if cmd_id in self._futures: + if data.get("status") == "success": + self._futures[cmd_id].set_result(data.get("result")) + else: + self._futures[cmd_id].set_exception( + RuntimeError(data.get("result", "Unknown error")) + ) + elif msg_type == "event": + await self._event_queue.put(data) + + except Exception as e: + logger.error(f"Failed to parse recorder message: {e}") + + async def _read_stderr_loop(self): + """Loop to read stderr and log messages.""" + while True: + line = await self._proc.stderr.readline() + if not line: + break + logger.debug(f"[Recorder Binary]: {line.decode('utf-8').strip()}") + + async def shutdown(self): + """Cleanly terminate the recorder binary process.""" + if self._proc: + try: + # Try graceful shutdown command first + await self._send_command("shutdown") + except Exception: + pass + + try: + self._proc.terminate() + await self._proc.wait() + except Exception: + pass + self._proc = None + + # Valid permission types + VALID_PERMISSIONS = {"microphone", "screen_capture"} + + async def request_permission(self, kind: str) -> bool: + """Request system permissions. + + :param str kind: One of "microphone", "screen_capture" + :return: True if granted, False if denied + :raises ValueError: If kind is not a valid permission type + """ + # Validate permission type + if kind not in self.VALID_PERMISSIONS: + raise ValueError( + f"Invalid permission type: '{kind}'. " + f"Valid types: {', '.join(sorted(self.VALID_PERMISSIONS))}" + ) + + # Map python-friendly names to binary-expected names + # e.g. "screen_capture" -> "screen-capture" + permission_map = { + "screen_capture": "screen-capture", + } + binary_kind = permission_map.get(kind, kind) + result = await self._send_command("requestPermission", {"permission": binary_kind}) + + # Binary returns {"requested": True} to confirm the request was initiated + # or may return {"status": "granted"} if already granted. + if result.get("requested") is True: + return True + + status = result.get("status") + if status == "granted": + return True + elif status == "denied": + logger.warning(f"Permission '{kind}' was denied.") + return False + + return False + + async def list_channels(self) -> Channels: + """Query the system for available audio and video channels. + + :return: Channels object with grouped collections (mics, displays, system_audio). + :rtype: Channels + """ + response = await self._send_command("getChannels") + raw_channels = response.get("channels", []) + + mics = [] + displays = [] + system_audio = [] + + for ch in raw_channels: + c_type = ch.get("type") + c_id = ch.get("channel_id") or ch.get("id") + c_name = ch.get("name", "") + + if not c_id: + logger.warning(f"Skipping channel with missing ID: {ch}") + continue + + # Categorize based on type and name patterns + if c_type == "video": + displays.append(VideoChannel(id=c_id, name=c_name, client=self)) + elif c_type == "audio": + # Distinguish between mic and system audio based on common patterns + name_lower = c_name.lower() + if "system" in name_lower or "output" in name_lower or "speaker" in name_lower: + system_audio.append(AudioChannel(id=c_id, name=c_name, client=self)) + else: + mics.append(AudioChannel(id=c_id, name=c_name, client=self)) + else: + logger.debug(f"Unknown channel type '{c_type}' for channel '{c_name}'") + + return Channels(mics=mics, displays=displays, system_audio=system_audio) + + async def start_capture_session( + self, + capture_session_id: str, + channels: List[Channel], + primary_video_channel_id: Optional[str] = None, + ) -> None: + """Start the recording session. + + :param str capture_session_id: The ID of the capture session. + :param list[Channel] channels: List of Channel objects to record. + :param str primary_video_channel_id: ID of the primary video channel. + :raises ValueError: If no channels are specified. + """ + if not channels: + raise ValueError("At least one channel must be specified for capture.") + + self._session_id = capture_session_id + + payload = { + "sessionId": capture_session_id, + "uploadToken": self.upload_token, + "channels": [ch.to_dict() for ch in channels], + } + + if primary_video_channel_id: + payload["primary_video_channel_id"] = primary_video_channel_id + + await self._send_command("startRecording", payload) + + async def stop_capture(self) -> None: + """Stop the current recording session.""" + if not self._session_id: + raise RuntimeError("No active capture session to stop.") + await self._send_command("stopRecording", {"sessionId": self._session_id}) + + async def events(self): + """Async generator that yields events from the recorder.""" + while True: + try: + # Use a timeout so we can check if the process is still alive + event = await asyncio.wait_for(self._event_queue.get(), timeout=1.0) + yield event + except asyncio.TimeoutError: + if self._proc is None or self._proc.returncode is not None: + break + continue + except Exception: + break diff --git a/videodb/capture_session.py b/videodb/capture_session.py new file mode 100644 index 0000000..12671b9 --- /dev/null +++ b/videodb/capture_session.py @@ -0,0 +1,61 @@ +from typing import List +from videodb.rtstream import RTStream + + +class CaptureSession: + """CaptureSession class representing a capture session. + + :ivar str id: Unique identifier for the session + :ivar str collection_id: ID of the collection this session belongs to + :ivar str end_user_id: ID of the end user + :ivar str client_id: Client-provided session ID + :ivar str status: Current status of the session + """ + + def __init__(self, _connection, id: str, collection_id: str, **kwargs) -> None: + self._connection = _connection + self.id = id + self.collection_id = collection_id + self._update_attributes(kwargs) + + def __repr__(self) -> str: + return ( + f"CaptureSession(" + f"id={self.id}, " + f"status={getattr(self, 'status', None)}, " + f"collection_id={self.collection_id}, " + f"end_user_id={getattr(self, 'end_user_id', None)})" + ) + + def _update_attributes(self, data: dict) -> None: + """Update instance attributes from API response data.""" + self.end_user_id = data.get("end_user_id") + self.client_id = data.get("client_id") + self.status = data.get("status") + self.callback_url = data.get("callback_url") + self.exported_video_id = data.get("exported_video_id") + self.metadata = data.get("metadata", {}) + + self.rtstreams = [] + for rts_data in data.get("rtstreams", []): + if not isinstance(rts_data, dict): + continue + stream = RTStream(self._connection, **rts_data) + self.rtstreams.append(stream) + + def get_rtstream(self, category: str) -> List[RTStream]: + """Get list of RTStreams by category. + + :param str category: Category to filter by. Use :class:`RTStreamChannelType` constants: + ``RTStreamChannelType.mic``, ``RTStreamChannelType.screen``, ``RTStreamChannelType.system_audio``. + :return: List of :class:`RTStream ` objects + :rtype: List[:class:`videodb.rtstream.RTStream`] + """ + filtered_streams = [] + + for stream in self.rtstreams: + channel_id = getattr(stream, "channel_id", "") or "" + if str(channel_id).lower() == category.lower(): + filtered_streams.append(stream) + + return filtered_streams diff --git a/videodb/client.py b/videodb/client.py index a01d10c..40351c9 100644 --- a/videodb/client.py +++ b/videodb/client.py @@ -19,6 +19,8 @@ from videodb.audio import Audio from videodb.image import Image from videodb.meeting import Meeting +from videodb.capture_session import CaptureSession +from videodb.websocket_client import WebSocketConnection from videodb._upload import ( upload, @@ -347,3 +349,143 @@ def get_meeting(self, meeting_id: str) -> Meeting: meeting = Meeting(self, id=meeting_id, collection_id="default") meeting.refresh() return meeting + + def create_capture_session( + self, + end_user_id: str, + collection_id: str = "default", + callback_url: str = None, + ws_connection_id: str = None, + metadata: dict = None, + ) -> CaptureSession: + """Create a capture session. + + :param str end_user_id: ID of the end user + :param str collection_id: ID of the collection (default: "default") + :param str callback_url: URL to receive callback (optional) + :param str ws_connection_id: WebSocket connection ID (optional) + :param dict metadata: Custom metadata (optional) + :return: :class:`CaptureSession ` object + :rtype: :class:`videodb.capture_session.CaptureSession` + """ + data = { + "end_user_id": end_user_id, + } + if callback_url: + data["callback_url"] = callback_url + if ws_connection_id: + data["ws_connection_id"] = ws_connection_id + if metadata: + data["metadata"] = metadata + + response = self.post( + path=f"{ApiPath.collection}/{collection_id}/{ApiPath.capture}/{ApiPath.session}", + data=data, + ) + # Extract id and collection_id from response to avoid duplicate arguments + session_id = response.pop("session_id", None) or response.pop("id", None) + response_collection_id = response.pop("collection_id", collection_id) + return CaptureSession( + self, id=session_id, collection_id=response_collection_id, **response + ) + + def get_capture_session( + self, session_id: str, collection_id: str = "default" + ) -> CaptureSession: + """Get a capture session by its ID. + + :param str session_id: ID of the capture session + :param str collection_id: ID of the collection (default: "default") + :return: :class:`CaptureSession ` object + :rtype: :class:`videodb.capture_session.CaptureSession` + """ + response = self.get( + path=f"{ApiPath.collection}/{collection_id}/{ApiPath.capture}/{ApiPath.session}/{session_id}" + ) + + # If response is wrapped in 'data', extract it + if "data" in response and isinstance(response["data"], dict): + response = response["data"] + + # Normalize rtstreams before passing to CaptureSession + for rts in response.get("rtstreams", []): + if isinstance(rts, dict): + if "rtstream_id" in rts and "id" not in rts: + rts["id"] = rts.pop("rtstream_id") + if "collection_id" not in rts: + rts["collection_id"] = collection_id + + # Extract id and collection_id from response to avoid duplicate arguments + response.pop("id", None) # Remove id from response + response.pop("collection_id", None) # Remove collection_id from response + + return CaptureSession( + self, id=session_id, collection_id=collection_id, **response + ) + + def list_capture_sessions( + self, + collection_id: str = "default", + status: str = None, + ) -> list[CaptureSession]: + """List capture sessions. + + :param str collection_id: ID of the collection (default: "default") + :param str status: Filter sessions by status (optional) + :return: List of :class:`CaptureSession ` objects + :rtype: list[:class:`videodb.capture_session.CaptureSession`] + """ + params = {} + if status: + params["status"] = status + + response = self.get( + path=f"{ApiPath.collection}/{collection_id}/{ApiPath.capture}/{ApiPath.session}", + params=params, + ) + + sessions = [] + for session_data in response.get("sessions", []): + session_id = session_data.pop("id", None) or session_data.pop( + "session_id", None + ) + # Normalize rtstreams + for rts in session_data.get("rtstreams", []): + if isinstance(rts, dict): + if "rtstream_id" in rts and "id" not in rts: + rts["id"] = rts.pop("rtstream_id") + if "collection_id" not in rts: + rts["collection_id"] = collection_id + # Remove collection_id from data + session_data.pop("collection_id", None) + sessions.append( + CaptureSession( + self, id=session_id, collection_id=collection_id, **session_data + ) + ) + return sessions + + def connect_websocket(self, collection_id: str = "default") -> WebSocketConnection: + """Connect to the VideoDB WebSocket service. + + :param str collection_id: ID of the collection (default: "default") + :return: :class:`WebSocketConnection ` object + :rtype: :class:`videodb.websocket_client.WebSocketConnection` + """ + path = f"{ApiPath.collection}/{collection_id}/{ApiPath.websocket}" + response = self.get(path=path) + websocket_url = response.get("websocket_url") + return WebSocketConnection(url=websocket_url) + + def generate_client_token(self, expires_in: int = 86400) -> str: + """Generate a client token for capture operations. + + :param int expires_in: Expiration time in seconds (default: 86400) + :return: Client token string + :rtype: str + """ + response = self.post( + path=f"{ApiPath.capture}/{ApiPath.session}/{ApiPath.token}", + data={"expires_in": expires_in}, + ) + return response.get("token") diff --git a/videodb/collection.py b/videodb/collection.py index 994cda6..77b7acc 100644 --- a/videodb/collection.py +++ b/videodb/collection.py @@ -13,7 +13,8 @@ from videodb.audio import Audio from videodb.image import Image from videodb.meeting import Meeting -from videodb.rtstream import RTStream +from videodb.capture_session import CaptureSession +from videodb.rtstream import RTStream, RTStreamSearchResult, RTStreamShot from videodb.search import SearchFactory, SearchResult logger = logging.getLogger(__name__) @@ -167,23 +168,45 @@ def delete_image(self, image_id: str) -> None: ) def connect_rtstream( - self, url: str, name: str, sample_rate: int = None + self, + url: str, + name: str, + sample_rate: int = None, + video: bool = None, + audio: bool = None, + enable_transcript: bool = None, + ws_connection_id: str = None, ) -> RTStream: """Connect to an rtstream. :param str url: URL of the rtstream :param str name: Name of the rtstream - :param int sample_rate: Sample rate of the rtstream (optional) + :param int sample_rate: Sample rate of the rtstream (optional, server default: 30) + :param bool video: Enable video streaming (optional, server default: True) + :param bool audio: Enable audio streaming (optional, server default: False) + :param bool enable_transcript: Enable real-time transcription (optional) + :param str ws_connection_id: WebSocket connection ID for receiving events (optional) :return: :class:`RTStream ` object """ + data = { + "collection_id": self.id, + "url": url, + "name": name, + } + if sample_rate is not None: + data["sample_rate"] = sample_rate + if video is not None: + data["video"] = video + if audio is not None: + data["audio"] = audio + if enable_transcript is not None: + data["enable_transcript"] = enable_transcript + if ws_connection_id is not None: + data["ws_connection_id"] = ws_connection_id + rtstream_data = self._connection.post( path=f"{ApiPath.rtstream}", - data={ - "collection_id": self.id, - "url": url, - "name": name, - "sample_rate": sample_rate, - }, + data=data, ) return RTStream(self._connection, **rtstream_data) @@ -199,14 +222,34 @@ def get_rtstream(self, id: str) -> RTStream: ) return RTStream(self._connection, **rtstream_data) - def list_rtstreams(self) -> List[RTStream]: + def list_rtstreams( + self, + limit: Optional[int] = None, + offset: Optional[int] = None, + status: Optional[str] = None, + name: Optional[str] = None, + ordering: Optional[str] = None, + ) -> List[RTStream]: """List all rtstreams in the collection. + :param int limit: Number of rtstreams to return (optional) + :param int offset: Number of rtstreams to skip (optional) + :param str status: Filter by status (optional) + :param str name: Filter by name (optional) + :param str ordering: Order results by field (optional) :return: List of :class:`RTStream ` objects :rtype: List[:class:`videodb.rtstream.RTStream`] """ + params = { + "limit": limit, + "offset": offset, + "status": status, + "name": name, + "ordering": ordering, + } rtstreams_data = self._connection.get( path=f"{ApiPath.rtstream}", + params={key: value for key, value in params.items() if value is not None}, ) return [ RTStream(self._connection, **rtstream) @@ -413,7 +456,9 @@ def search( score_threshold: Optional[float] = None, dynamic_score_percentage: Optional[float] = None, filter: List[Dict[str, Any]] = [], - ) -> SearchResult: + namespace: Optional[str] = None, + scene_index_id: Optional[str] = None, + ) -> Union[SearchResult, RTStreamSearchResult]: """Search for a query in the collection. :param str query: Query to search for @@ -422,10 +467,50 @@ def search( :param int result_threshold: Number of results to return (optional) :param float score_threshold: Threshold score for the search (optional) :param float dynamic_score_percentage: Percentage of dynamic score to consider (optional) + :param list filter: Additional metadata filters (optional) + :param str namespace: Search namespace (optional, "rtstream" to search RTStreams) + :param str scene_index_id: Filter by specific scene index (optional) :raise SearchError: If the search fails - :return: :class:`SearchResult ` object - :rtype: :class:`videodb.search.SearchResult` + :return: :class:`SearchResult ` or + :class:`RTStreamSearchResult ` object + :rtype: Union[:class:`videodb.search.SearchResult`, + :class:`videodb.rtstream.RTStreamSearchResult`] """ + if namespace == "rtstream": + data = {"query": query} + if scene_index_id is not None: + data["scene_index_id"] = scene_index_id + if result_threshold is not None: + data["result_threshold"] = result_threshold + if score_threshold is not None: + data["score_threshold"] = score_threshold + if dynamic_score_percentage is not None: + data["dynamic_score_percentage"] = dynamic_score_percentage + if filter is not None: + data["filter"] = filter + + search_data = self._connection.post( + path=f"{ApiPath.rtstream}/{ApiPath.collection}/{self.id}/{ApiPath.search}", + data=data, + ) + results = search_data.get("results", []) + shots = [ + RTStreamShot( + _connection=self._connection, + rtstream_id=result.get("rtstream_id") or result.get("id"), + rtstream_name=result.get("rtstream_name"), + start=result.get("start"), + end=result.get("end"), + text=result.get("text"), + search_score=result.get("score"), + scene_index_id=result.get("scene_index_id"), + scene_index_name=result.get("scene_index_name"), + metadata=result.get("metadata"), + ) + for result in results + ] + return RTStreamSearchResult(collection_id=self.id, shots=shots) + search = SearchFactory(self._connection).get_search(search_type) return search.search_inside_collection( collection_id=self.id, @@ -482,6 +567,7 @@ def upload( callback_url=callback_url, file_path=file_path, url=url, + collection_id=self.id, ) media_id = upload_data.get("id", "") if media_id.startswith("m-"): @@ -565,3 +651,111 @@ def get_meeting(self, meeting_id: str) -> Meeting: meeting = Meeting(self._connection, id=meeting_id, collection_id=self.id) meeting.refresh() return meeting + + def create_capture_session( + self, + end_user_id: str, + callback_url: str = None, + ws_connection_id: str = None, + metadata: dict = None, + ) -> "CaptureSession": + """Create a capture session. + + :param str end_user_id: ID of the end user + :param str callback_url: URL to receive callback (optional) + :param str ws_connection_id: WebSocket connection ID (optional) + :param dict metadata: Custom metadata (optional) + :return: :class:`CaptureSession ` object + :rtype: :class:`videodb.capture_session.CaptureSession` + """ + data = { + "end_user_id": end_user_id, + } + if callback_url: + data["callback_url"] = callback_url + if ws_connection_id: + data["ws_connection_id"] = ws_connection_id + if metadata: + data["metadata"] = metadata + + response = self._connection.post( + path=f"{ApiPath.collection}/{self.id}/{ApiPath.capture}/{ApiPath.session}", + data=data, + ) + # Normalize rtstreams before passing to CaptureSession + for rts in response.get("rtstreams", []): + if isinstance(rts, dict): + if "rtstream_id" in rts and "id" not in rts: + rts["id"] = rts.pop("rtstream_id") + if "collection_id" not in rts: + rts["collection_id"] = self.id + # Extract id and collection_id from response to avoid duplicate arguments + session_id = response.pop("session_id", None) or response.pop("id", None) + response.pop("collection_id", None) + return CaptureSession( + self._connection, id=session_id, collection_id=self.id, **response + ) + + def get_capture_session(self, session_id: str) -> "CaptureSession": + """Get a capture session by its ID. + + :param str session_id: ID of the capture session + :return: :class:`CaptureSession ` object + :rtype: :class:`videodb.capture_session.CaptureSession` + """ + response = self._connection.get( + path=f"{ApiPath.collection}/{self.id}/{ApiPath.capture}/{ApiPath.session}/{session_id}" + ) + # Normalize rtstreams before passing to CaptureSession + for rts in response.get("rtstreams", []): + if isinstance(rts, dict): + if "rtstream_id" in rts and "id" not in rts: + rts["id"] = rts.pop("rtstream_id") + if "collection_id" not in rts: + rts["collection_id"] = self.id + # Extract id and collection_id from response to avoid duplicate arguments + response.pop("id", None) + response.pop("collection_id", None) + return CaptureSession( + self._connection, id=session_id, collection_id=self.id, **response + ) + + def list_capture_sessions(self, status: str = None) -> list["CaptureSession"]: + """List capture sessions. + + :param str status: Filter sessions by status (optional) + :return: List of :class:`CaptureSession ` objects + :rtype: list[:class:`videodb.capture_session.CaptureSession`] + """ + params = {} + if status: + params["status"] = status + + response = self._connection.get( + path=f"{ApiPath.collection}/{self.id}/{ApiPath.capture}/{ApiPath.session}", + params=params, + ) + + sessions = [] + for session_data in response.get("sessions", []): + session_id = session_data.pop("id", None) or session_data.pop( + "session_id", None + ) + # Normalize rtstreams + for rts in session_data.get("rtstreams", []): + if isinstance(rts, dict): + if "rtstream_id" in rts and "id" not in rts: + rts["id"] = rts.pop("rtstream_id") + if "collection_id" not in rts: + rts["collection_id"] = self.id + # Remove collection_id from data + session_data.pop("collection_id", None) + sessions.append( + CaptureSession( + self._connection, + id=session_id, + collection_id=self.id, + **session_data, + ) + ) + return sessions diff --git a/videodb/editor.py b/videodb/editor.py index 4c3ae25..5c40282 100644 --- a/videodb/editor.py +++ b/videodb/editor.py @@ -1,7 +1,14 @@ +import json +import requests + from typing import List, Optional, Union from enum import Enum from videodb._constants import ApiPath +from videodb.exceptions import InvalidRequestError + + +MAX_PAYLOAD_SIZE = 100 * 1024 class AssetType(str, Enum): @@ -349,7 +356,6 @@ class Font: :ivar int size: Font size in pixels :ivar str color: Font color in hex format (e.g., "#FFFFFF") :ivar float opacity: Font opacity (0.0 to 1.0) - :ivar int weight: (optional) Font weight (100 to 900) """ def __init__( @@ -358,7 +364,6 @@ def __init__( size: int = 48, color: str = "#FFFFFF", opacity: float = 1.0, - weight: Optional[int] = None, ): """Initialize a Font instance. @@ -366,21 +371,17 @@ def __init__( :param int size: Font size in pixels (default: 48) :param str color: Font color in hex format (default: "#FFFFFF") :param float opacity: Font opacity between 0.0 and 1.0 (default: 1.0) - :param int weight: (optional) Font weight between 100 and 900 - :raises ValueError: If size < 1, opacity not in [0.0, 1.0], or weight not in [100, 900] + :raises ValueError: If size < 1, opacity not in [0.0, 1.0] """ if size < 1: raise ValueError("size must be at least 1") if not (0.0 <= opacity <= 1.0): raise ValueError("opacity must be between 0.0 and 1.0") - if weight is not None and not (100 <= weight <= 900): - raise ValueError("weight must be between 100 and 900") self.family = family self.size = size self.color = color self.opacity = opacity - self.weight = weight def to_json(self) -> dict: """Convert the font settings to a JSON-serializable dictionary. @@ -394,8 +395,6 @@ def to_json(self) -> dict: "color": self.color, "opacity": self.opacity, } - if self.weight is not None: - data["weight"] = self.weight return data @@ -1100,17 +1099,61 @@ def generate_stream(self) -> str: Makes an API request to render the timeline and generate streaming URLs. Updates the stream_url and player_url instance variables. + If the timeline data exceeds the max payload size, it will be uploaded + as a file first to avoid HTTP content length limits. + :return: The stream URL of the generated video :rtype: str """ - stream_data = self.connection.post( - path=ApiPath.editor, - data=self.to_json(), - ) + timeline_data = self.to_json() + json_str = json.dumps(timeline_data) + payload_size = len(json_str.encode("utf-8")) + + if payload_size > MAX_PAYLOAD_SIZE: + # Upload timeline data as a file to avoid HTTP content length limits + timeline_url = self._upload_timeline_data(json_str) + stream_data = self.connection.post( + path=ApiPath.editor, + data={"timeline_url": timeline_url}, + ) + else: + stream_data = self.connection.post( + path=ApiPath.editor, + data=timeline_data, + ) + self.stream_url = stream_data.get("stream_url") self.player_url = stream_data.get("player_url") return stream_data.get("stream_url", None) + def _upload_timeline_data(self, json_str: str) -> str: + """Upload timeline JSON data as a file and return the URL. + + :param str json_str: The JSON string of timeline data to upload + :return: The URL of the uploaded file + :rtype: str + :raises InvalidRequestError: If upload fails + """ + # Get a presigned upload URL + upload_url_data = self.connection.get( + path=f"{ApiPath.collection}/{self.connection.collection_id}/{ApiPath.upload_url}", + params={"name": "timeline_data.json"}, + ) + upload_url = upload_url_data.get("upload_url") + + # Upload the JSON data as a file + try: + files = {"file": ("timeline_data.json", json_str, "application/json")} + response = requests.post(upload_url, files=files) + response.raise_for_status() + except requests.exceptions.RequestException as e: + raise InvalidRequestError( + f"Failed to upload timeline data: {str(e)}", + getattr(e, "response", None), + ) from None + + return upload_url + def download_stream(self, stream_url: str) -> dict: """Download a stream from the timeline. diff --git a/videodb/meeting.py b/videodb/meeting.py index 827f5c6..6b4e0cb 100644 --- a/videodb/meeting.py +++ b/videodb/meeting.py @@ -29,7 +29,15 @@ def __init__(self, _connection, id: str, collection_id: str, **kwargs) -> None: self._update_attributes(kwargs) def __repr__(self) -> str: - return f"Meeting(id={self.id}, collection_id={self.collection_id}, meeting_title={self.meeting_title}, status={self.status}, bot_name={self.bot_name}, meeting_url={self.meeting_url})" + return ( + f"Meeting(" + f"id={self.id}, " + f"collection_id={self.collection_id}, " + f"meeting_title={self.meeting_title}, " + f"status={self.status}, " + f"bot_name={self.bot_name}, " + f"meeting_url={self.meeting_url})" + ) def _update_attributes(self, data: dict) -> None: """Update instance attributes from API response data. diff --git a/videodb/rtstream.py b/videodb/rtstream.py index 4be4a8c..aaddac7 100644 --- a/videodb/rtstream.py +++ b/videodb/rtstream.py @@ -1,7 +1,130 @@ +from typing import Optional, List, Dict, Any + from videodb._constants import ( ApiPath, SceneExtractionType, + Segmenter, ) +from videodb._utils._video import play_stream + + +class RTStreamSearchResult: + """RTStreamSearchResult class to interact with rtstream search results + + :ivar str collection_id: ID of the collection this rtstream belongs to + :ivar List[RTStreamShot] shots: List of shots in the search result + """ + + def __init__( + self, + collection_id: str, + shots: List["RTStreamShot"], + ) -> None: + self.collection_id = collection_id + self.shots = shots + + def __repr__(self) -> str: + return ( + f"RTStreamSearchResult(" + f"collection_id={self.collection_id}, " + f"shots={len(self.shots)})" + ) + + def get_shots(self) -> List["RTStreamShot"]: + """Get the list of shots from the search result. + + :return: List of :class:`RTStreamShot ` objects + :rtype: List[:class:`videodb.rtstream.RTStreamShot`] + """ + return self.shots + + +class RTStreamShot: + """RTStreamShot class for rtstream search results + + :ivar str rtstream_id: ID of the rtstream + :ivar str rtstream_name: Name of the rtstream + :ivar float start: Start time in Unix timestamp + :ivar float end: End time in Unix timestamp + :ivar str text: Text content of the shot + :ivar float search_score: Search relevance score + :ivar str scene_index_id: ID of the scene index (optional) + :ivar str scene_index_name: Name of the scene index (optional) + :ivar dict metadata: Additional metadata (optional) + :ivar str stream_url: URL to stream the shot + :ivar str player_url: URL to play the shot in a player + """ + + def __init__( + self, + _connection, + rtstream_id: str, + start: float, + end: float, + rtstream_name: Optional[str] = None, + text: Optional[str] = None, + search_score: Optional[float] = None, + scene_index_id: Optional[str] = None, + scene_index_name: Optional[str] = None, + metadata: Optional[dict] = None, + ) -> None: + self._connection = _connection + self.rtstream_id = rtstream_id + self.rtstream_name = rtstream_name + self.start = start + self.end = end + self.text = text + self.search_score = search_score + self.scene_index_id = scene_index_id + self.scene_index_name = scene_index_name + self.metadata = metadata + self.stream_url = None + self.player_url = None + + def __repr__(self) -> str: + repr_str = ( + f"RTStreamShot(" + f"rtstream_id={self.rtstream_id}, " + f"rtstream_name={self.rtstream_name}, " + f"start={self.start}, " + f"end={self.end}, " + f"text={self.text}, " + f"search_score={self.search_score}" + ) + if self.scene_index_id: + repr_str += f", scene_index_id={self.scene_index_id}" + if self.scene_index_name: + repr_str += f", scene_index_name={self.scene_index_name}" + if self.metadata: + repr_str += f", metadata={self.metadata}" + repr_str += ")" + return repr_str + + def generate_stream(self) -> str: + """Generate a stream url for the shot. + + :return: The stream url + :rtype: str + """ + if self.stream_url: + return self.stream_url + + stream_data = self._connection.get( + f"{ApiPath.rtstream}/{self.rtstream_id}/{ApiPath.stream}", + params={"start": int(self.start), "end": int(self.end)}, + ) + self.stream_url = stream_data.get("stream_url") + self.player_url = stream_data.get("player_url") + return self.stream_url + + def play(self) -> str: + """Generate a stream url for the shot and open it in the default browser. + + :return: The stream url + :rtype: str + """ + self.generate_stream() + return play_stream(self.stream_url) class RTStreamSceneIndex: @@ -90,20 +213,24 @@ def stop(self): ) self.status = "stopped" - def create_alert(self, event_id, callback_url) -> str: + def create_alert(self, event_id, callback_url, ws_connection_id=None) -> str: """Create an event alert. :param str event_id: ID of the event :param str callback_url: URL to receive the alert callback + :param str ws_connection_id: WebSocket connection ID for real-time alerts :return: Alert ID :rtype: str """ + data = { + "event_id": event_id, + "callback_url": callback_url, + } + if ws_connection_id: + data["ws_connection_id"] = ws_connection_id alert_data = self._connection.post( f"{ApiPath.rtstream}/{self.rtstream_id}/{ApiPath.index}/{self.rtstream_index_id}/{ApiPath.alert}", - data={ - "event_id": event_id, - "callback_url": callback_url, - }, + data=data, ) return alert_data.get("alert_id", None) @@ -162,6 +289,7 @@ def __init__(self, _connection, id: str, **kwargs) -> None: self.created_at = kwargs.get("created_at", None) self.sample_rate = kwargs.get("sample_rate", None) self.status = kwargs.get("status", None) + self.channel_id = kwargs.get("channel_id", None) def __repr__(self) -> str: return ( @@ -198,6 +326,37 @@ def stop(self): ) self.status = "stopped" + def start_transcript( + self, ws_connection_id: Optional[str] = None, engine: str = "assemblyai" + ) -> dict: + """Start transcription for the rtstream. + + :param str ws_connection_id: WebSocket connection ID for real-time transcript updates (optional) + :param str engine: Transcription engine (default: "assemblyai") + :return: Transcription status with start time + :rtype: dict + """ + data = {"action": "start", "engine": engine} + if ws_connection_id: + data["ws_connection_id"] = ws_connection_id + + return self._connection.post( + f"{ApiPath.rtstream}/{self.id}/{ApiPath.transcription}", + data=data, + ) + + def stop_transcript(self, engine: str = "assemblyai") -> dict: + """Stop transcription for the rtstream. + + :param str engine: Transcription engine (default: "assemblyai") + :return: Transcription status with start and end time + :rtype: dict + """ + return self._connection.post( + f"{ApiPath.rtstream}/{self.id}/{ApiPath.transcription}", + data={"action": "stop", "engine": engine}, + ) + def generate_stream(self, start, end): """Generate a stream from the rtstream. @@ -220,6 +379,7 @@ def index_scenes( model_name=None, model_config={}, name=None, + ws_connection_id: Optional[str] = None, ): """Index scenes from the rtstream. @@ -229,19 +389,202 @@ def index_scenes( :param str model_name: Name of the model :param dict model_config: Configuration for the model :param str name: Name of the scene index + :param str ws_connection_id: WebSocket connection ID for real-time updates (optional) :return: Scene index, :class:`RTStreamSceneIndex ` object :rtype: :class:`videodb.rtstream.RTStreamSceneIndex` """ + data = { + "extraction_type": extraction_type, + "extraction_config": extraction_config, + "prompt": prompt, + "model_name": model_name, + "model_config": model_config, + "name": name, + } + if ws_connection_id: + data["ws_connection_id"] = ws_connection_id + index_data = self._connection.post( f"{ApiPath.rtstream}/{self.id}/{ApiPath.index}/{ApiPath.scene}", - data={ - "extraction_type": extraction_type, - "extraction_config": extraction_config, - "prompt": prompt, - "model_name": model_name, - "model_config": model_config, - "name": name, - }, + data=data, + ) + if not index_data: + return None + return RTStreamSceneIndex( + _connection=self._connection, + rtstream_index_id=index_data.get("rtstream_index_id"), + rtstream_id=self.id, + extraction_type=index_data.get("extraction_type"), + extraction_config=index_data.get("extraction_config"), + prompt=index_data.get("prompt"), + name=index_data.get("name"), + status=index_data.get("status"), + ) + + def index_spoken_words( + self, + prompt: str = None, + segmenter: str = Segmenter.word, + length: int = 10, + model_name: str = None, + model_config: dict = {}, + name: str = None, + ws_connection_id: Optional[str] = None, + ): + """Index spoken words from the rtstream transcript. + + :param str prompt: Prompt for summarizing transcript segments + :param Segmenter segmenter: Segmentation type (:class:`Segmenter.word`, + :class:`Segmenter.sentence`, :class:`Segmenter.time`) + :param int length: Length of segments (words, sentences, or seconds based on segmenter) + :param str model_name: Name of the model + :param dict model_config: Configuration for the model + :param str name: Name of the spoken words index + :param str ws_connection_id: WebSocket connection ID for real-time updates (optional) + :return: Scene index, :class:`RTStreamSceneIndex ` object + :rtype: :class:`videodb.rtstream.RTStreamSceneIndex` + """ + extraction_config = { + "segmenter": segmenter, + "segmentation_value": length, + } + + data = { + "extraction_type": SceneExtractionType.transcript, + "extraction_config": extraction_config, + "prompt": prompt, + "model_name": model_name, + "model_config": model_config, + "name": name, + } + if ws_connection_id: + data["ws_connection_id"] = ws_connection_id + + index_data = self._connection.post( + f"{ApiPath.rtstream}/{self.id}/{ApiPath.index}/{ApiPath.scene}", + data=data, + ) + if not index_data: + return None + return RTStreamSceneIndex( + _connection=self._connection, + rtstream_index_id=index_data.get("rtstream_index_id"), + rtstream_id=self.id, + extraction_type=index_data.get("extraction_type"), + extraction_config=index_data.get("extraction_config"), + prompt=index_data.get("prompt"), + name=index_data.get("name"), + status=index_data.get("status"), + ) + + def index_audio( + self, + prompt: str = None, + batch_config: dict = None, + model_name: str = None, + model_config: dict = {}, + name: str = None, + ws_connection_id: Optional[str] = None, + ): + """Index audio from the rtstream transcript. + + :param str prompt: Prompt for summarizing transcript segments + :param dict batch_config: Segmentation config with keys: + - "type": Segmentation type ("word", "sentence", or "time") + - "value": Segment length (words, sentences, or seconds) + :param str model_name: Name of the model + :param dict model_config: Configuration for the model + :param str name: Name of the audio index + :param str ws_connection_id: WebSocket connection ID for real-time updates (optional) + :return: Scene index, :class:`RTStreamSceneIndex ` object + :rtype: :class:`videodb.rtstream.RTStreamSceneIndex` + """ + # Default batch_config + if batch_config is None: + batch_config = {"type": "word", "value": 10} + + segmenter = batch_config.get("type", Segmenter.word) + segmentation_value = batch_config.get("value", 10) + + extraction_config = { + "segmenter": segmenter, + "segmentation_value": segmentation_value, + } + + data = { + "extraction_type": SceneExtractionType.transcript, + "extraction_config": extraction_config, + "prompt": prompt, + "model_name": model_name, + "model_config": model_config, + "name": name, + } + if ws_connection_id: + data["ws_connection_id"] = ws_connection_id + + index_data = self._connection.post( + f"{ApiPath.rtstream}/{self.id}/{ApiPath.index}/{ApiPath.scene}", + data=data, + ) + if not index_data: + return None + return RTStreamSceneIndex( + _connection=self._connection, + rtstream_index_id=index_data.get("rtstream_index_id"), + rtstream_id=self.id, + extraction_type=index_data.get("extraction_type"), + extraction_config=index_data.get("extraction_config"), + prompt=index_data.get("prompt"), + name=index_data.get("name"), + status=index_data.get("status"), + ) + + def index_visuals( + self, + prompt: str = None, + batch_config: dict = None, + model_name: str = None, + model_config: dict = {}, + name: str = None, + ws_connection_id: Optional[str] = None, + ): + """Index visuals (scenes) from the rtstream. + + :param str prompt: Prompt for scene description + :param dict batch_config: Frame extraction config with keys: + - "type": Only "time" is supported + - "value": Window size in seconds + - "frame_count": Number of frames to extract per window + :param str model_name: Name of the model + :param dict model_config: Configuration for the model + :param str name: Name of the visual index + :param str ws_connection_id: WebSocket connection ID for real-time updates (optional) + :return: Scene index, :class:`RTStreamSceneIndex ` object + :rtype: :class:`videodb.rtstream.RTStreamSceneIndex` + """ + # Default batch_config + if batch_config is None: + batch_config = {"type": "time", "value": 2, "frame_count": 5} + + extraction_config = { + "time": batch_config.get("value", 2), + "frame_count": batch_config.get("frame_count", 5), + } + + data = { + "extraction_type": SceneExtractionType.time_based, + "extraction_config": extraction_config, + "prompt": prompt, + "model_name": model_name, + "model_config": model_config, + "name": name, + } + if ws_connection_id: + data["ws_connection_id"] = ws_connection_id + + index_data = self._connection.post( + f"{ApiPath.rtstream}/{self.id}/{ApiPath.index}/{ApiPath.scene}", + data=data, ) if not index_data: return None @@ -299,3 +642,100 @@ def get_scene_index(self, index_id: str) -> RTStreamSceneIndex: name=index_data.get("name"), status=index_data.get("status"), ) + + def get_transcript( + self, + page=1, + page_size=100, + start=None, + end=None, + since=None, + engine=None, + ): + """Get transcription data from the rtstream. + + :param int page: Page number (default: 1) + :param int page_size: Items per page (default: 100, max: 1000) + :param float start: Start timestamp filter (optional) + :param float end: End timestamp filter (optional) + :param float since: For polling - only get transcriptions after this timestamp (optional) + :param str engine: Transcription engine (default: "AAIS") + :return: Transcription data with segments and metadata + :rtype: dict + """ + params = { + "engine": engine, + "page": page, + "page_size": page_size, + } + if start is not None: + params["start"] = start + if end is not None: + params["end"] = end + if since is not None: + params["since"] = since + + transcription_data = self._connection.get( + f"{ApiPath.rtstream}/{self.id}/{ApiPath.transcription}", + params=params, + ) + return transcription_data + + def search( + self, + query: str, + index_id: Optional[str] = None, + result_threshold: Optional[int] = None, + score_threshold: Optional[float] = None, + dynamic_score_percentage: Optional[float] = None, + filter: Optional[List[Dict[str, Any]]] = None, + ) -> RTStreamSearchResult: + """Search across scene index records for the rtstream. + + :param str query: Query to search for + :param str index_id: Filter by specific scene index (optional) + :param int result_threshold: Number of results to return (optional) + :param float score_threshold: Minimum score threshold (optional) + :param float dynamic_score_percentage: Percentage of dynamic score to consider (optional) + :param list filter: Additional metadata filters (optional) + :return: :class:`RTStreamSearchResult ` object + :rtype: :class:`videodb.rtstream.RTStreamSearchResult` + """ + data = {"query": query} + + if index_id is not None: + data["scene_index_id"] = index_id + if result_threshold is not None: + data["result_threshold"] = result_threshold + if score_threshold is not None: + data["score_threshold"] = score_threshold + if dynamic_score_percentage is not None: + data["dynamic_score_percentage"] = dynamic_score_percentage + if filter is not None: + data["filter"] = filter + + search_data = self._connection.post( + f"{ApiPath.rtstream}/{self.id}/{ApiPath.search}", + data=data, + ) + + results = search_data.get("results", []) + shots = [ + RTStreamShot( + _connection=self._connection, + rtstream_id=self.id, + rtstream_name=self.name, + start=result.get("start"), + end=result.get("end"), + text=result.get("text"), + search_score=result.get("score"), + scene_index_id=result.get("scene_index_id"), + scene_index_name=result.get("scene_index_name"), + metadata=result.get("metadata"), + ) + for result in results + ] + return RTStreamSearchResult( + collection_id=self.collection_id, + shots=shots, + ) diff --git a/videodb/search.py b/videodb/search.py index ba557be..94730ec 100644 --- a/videodb/search.py +++ b/videodb/search.py @@ -45,6 +45,9 @@ def _format_results(self): doc.get("end"), doc.get("text"), doc.get("score"), + scene_index_id=doc.get("scene_index_id"), + scene_index_name=doc.get("scene_index_name"), + metadata=doc.get("metadata"), ) ) @@ -132,9 +135,11 @@ def search_inside_video( "index_type": index_type, "query": query, "score_threshold": score_threshold - or SemanticSearchDefaultValues.score_threshold, + if score_threshold is not None + else SemanticSearchDefaultValues.score_threshold, "result_threshold": result_threshold - or SemanticSearchDefaultValues.result_threshold, + if result_threshold is not None + else SemanticSearchDefaultValues.result_threshold, "dynamic_score_percentage": dynamic_score_percentage, **kwargs, }, @@ -159,9 +164,11 @@ def search_inside_collection( "index_type": index_type, "query": query, "score_threshold": score_threshold - or SemanticSearchDefaultValues.score_threshold, + if score_threshold is not None + else SemanticSearchDefaultValues.score_threshold, "result_threshold": result_threshold - or SemanticSearchDefaultValues.result_threshold, + if result_threshold is not None + else SemanticSearchDefaultValues.result_threshold, "dynamic_score_percentage": dynamic_score_percentage, **kwargs, }, diff --git a/videodb/shot.py b/videodb/shot.py index c2fadcb..b261077 100644 --- a/videodb/shot.py +++ b/videodb/shot.py @@ -1,5 +1,3 @@ - - from typing import Optional from videodb._utils._video import play_stream from videodb._constants import ( @@ -19,6 +17,9 @@ class Shot: :ivar int search_score: Search relevance score :ivar str stream_url: URL to stream the shot :ivar str player_url: URL to play the shot in a player + :ivar Optional[str] scene_index_id: ID of the scene index for scene search results + :ivar Optional[str] scene_index_name: Name of the scene index for scene search results + :ivar Optional[dict] metadata: Additional metadata for the shot """ def __init__( @@ -31,6 +32,9 @@ def __init__( end: float, text: Optional[str] = None, search_score: Optional[int] = None, + scene_index_id: Optional[str] = None, + scene_index_name: Optional[str] = None, + metadata: Optional[dict] = None, ) -> None: self._connection = _connection self.video_id = video_id @@ -40,21 +44,33 @@ def __init__( self.end = end self.text = text self.search_score = search_score + self.scene_index_id = scene_index_id + self.scene_index_name = scene_index_name + self.metadata = metadata self.stream_url = None self.player_url = None def __repr__(self) -> str: - return ( + repr_str = ( f"Shot(" f"video_id={self.video_id}, " f"video_title={self.video_title}, " f"start={self.start}, " f"end={self.end}, " f"text={self.text}, " - f"search_score={self.search_score}, " - f"stream_url={self.stream_url}, " - f"player_url={self.player_url})" + f"search_score={self.search_score}" ) + if self.scene_index_id: + repr_str += f", scene_index_id={self.scene_index_id}" + + if self.scene_index_name: + repr_str += f", scene_index_name={self.scene_index_name}" + + if self.metadata: + repr_str += f", metadata={self.metadata}" + + repr_str += f", stream_url={self.stream_url}, player_url={self.player_url})" + return repr_str def __getitem__(self, key): """Get an item from the shot object""" diff --git a/videodb/video.py b/videodb/video.py index 34af19d..9dd1edb 100644 --- a/videodb/video.py +++ b/videodb/video.py @@ -7,6 +7,7 @@ SceneExtractionType, SearchType, Segmenter, + SegmentationType, SubtitleStyle, Workflows, ) @@ -297,12 +298,14 @@ def translate_transcript( def index_spoken_words( self, language_code: Optional[str] = None, + segmentation_type: Optional[SegmentationType] = SegmentationType.sentence, force: bool = False, callback_url: str = None, ) -> None: """Semantic indexing of spoken words in the video. :param str language_code: (optional) Language code of the video + :param SegmentationType segmentation_type: (optional) Segmentation type used for indexing, :class:`SegmentationType ` object :param bool force: (optional) Force to index the video :param str callback_url: (optional) URL to receive the callback :raises InvalidRequestError: If the video is already indexed @@ -314,6 +317,7 @@ def index_spoken_words( data={ "index_type": IndexType.spoken_word, "language_code": language_code, + "segmentation_type": segmentation_type, "force": force, "callback_url": callback_url, }, @@ -582,6 +586,30 @@ def add_subtitle(self, style: SubtitleStyle = SubtitleStyle()) -> str: ) return subtitle_data.get("stream_url", None) + def clip( + self, + prompt: str, + content_type: str, + model_name: str, + ) -> str: + """Generate a clip from the video using a prompt. + :param str prompt: Prompt to generate the clip + :param str content_type: Content type for the clip + :param str model_name: Model name for generation + :return: The stream url of the generated clip + :rtype: str + """ + + clip_data = self._connection.post( + path=f"{ApiPath.video}/{self.id}/{ApiPath.clip}", + data={ + "prompt": prompt, + "content_type": content_type, + "model_name": model_name, + }, + ) + return SearchResult(self._connection, **clip_data) + def insert_video(self, video, timestamp: float) -> str: """Insert a video into another video diff --git a/videodb/websocket_client.py b/videodb/websocket_client.py new file mode 100644 index 0000000..462c0e4 --- /dev/null +++ b/videodb/websocket_client.py @@ -0,0 +1,72 @@ +import json +import logging +from typing import AsyncGenerator + +# Deferred import to avoid hard dependency at module level if installed without extra +try: + import websockets +except ImportError: + websockets = None + +logger = logging.getLogger(__name__) + +class WebSocketConnection: + """Class representing a persistent WebSocket connection for receiving events.""" + + def __init__(self, url: str) -> None: + if websockets is None: + raise ImportError( + "The 'websockets' library is required for WebSocket support. " + "Please install it using 'pip install videodb[websockets]' or 'pip install websockets'." + ) + self.url = url + self._connection = None + self.connection_id = None + + async def connect(self) -> "WebSocketConnection": + """Establish the WebSocket connection.""" + logger.debug(f"Connecting to WebSocket URL: {self.url}") + self._connection = await websockets.connect(self.url) + + # Expect the first message to be the connection init containing the ID + try: + init_msg = await self._connection.recv() + data = json.loads(init_msg) + self.connection_id = data.get("connection_id") + logger.info(f"WebSocket connected with ID: {self.connection_id}") + except Exception as e: + logger.error(f"Failed to receive initialization message: {e}") + await self.close() + raise e + + return self + + async def close(self) -> None: + """Close the WebSocket connection.""" + if self._connection: + await self._connection.close() + self._connection = None + + async def receive(self) -> AsyncGenerator[dict, None]: + """Async generator that yields received messages.""" + if not self._connection: + raise ConnectionError("WebSocket is not connected. Call connect() first.") + + async for message in self._connection: + try: + yield json.loads(message) + except json.JSONDecodeError: + logger.warning(f"Received non-JSON message: {message}") + yield {"raw": message} + + async def send(self, message: dict) -> None: + """Send a message over the WebSocket.""" + if not self._connection: + raise ConnectionError("WebSocket is not connected.") + await self._connection.send(json.dumps(message)) + + async def __aenter__(self): + return await self.connect() + + async def __aexit__(self, exc_type, exc_value, traceback): + await self.close()