From 155e2a630b60374cce783e756cb68dfe8fb5bfa5 Mon Sep 17 00:00:00 2001
From: voluntas <nakai@shiguredo.jp>
Date: Tue, 27 Jan 2026 00:37:06 +0900
Subject: [PATCH 01/12] =?UTF-8?q?=E3=82=B9=E3=82=B1=E3=83=BC=E3=83=AA?=
 =?UTF-8?q?=E3=83=B3=E3=82=B0=E3=83=86=E3=82=B9=E3=83=88=E3=81=A7=E5=85=A8?=
 =?UTF-8?q?=E3=83=94=E3=82=AF=E3=82=BB=E3=83=AB=E3=83=95=E3=82=A9=E3=83=BC?=
 =?UTF-8?q?=E3=83=9E=E3=83=83=E3=83=88=E3=82=92=E3=83=86=E3=82=B9=E3=83=88?=
 =?UTF-8?q?=E3=81=99=E3=82=8B?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- pytest.mark.parametrize を使用してテストをリファクタリング
- I420, I422, I444, NV12, RGBA, BGRA, RGB, BGR の全フォーマットをテスト
- テスト数: 8 → 64 (8 フォーマット × 8 テスト関数)
---
 tests/test_encoder_scaling.py | 152 +++++++++++++++++-----------------
 1 file changed, 74 insertions(+), 78 deletions(-)

diff --git a/tests/test_encoder_scaling.py b/tests/test_encoder_scaling.py
index 3234861..55d1204 100644
--- a/tests/test_encoder_scaling.py
+++ b/tests/test_encoder_scaling.py
@@ -14,7 +14,6 @@
 import pytest
 
 from webcodecs import (
-    CodecState,
     EncodedVideoChunkType,
     LatencyMode,
     VideoDecoder,
@@ -27,12 +26,32 @@
 )
 
 
-def _make_test_frame(width: int, height: int, frame_num: int = 0) -> VideoFrame:
+def _calculate_frame_data_size(width: int, height: int, pixel_format: VideoPixelFormat) -> int:
+    """ピクセルフォーマットに応じたデータサイズを計算する."""
+    match pixel_format:
+        case VideoPixelFormat.I420 | VideoPixelFormat.NV12:
+            return width * height * 3 // 2
+        case VideoPixelFormat.I422:
+            return width * height * 2
+        case VideoPixelFormat.I444 | VideoPixelFormat.RGB | VideoPixelFormat.BGR:
+            return width * height * 3
+        case VideoPixelFormat.RGBA | VideoPixelFormat.BGRA:
+            return width * height * 4
+        case _:
+            raise ValueError(f"Unsupported pixel format: {pixel_format}")
+
+
+def _make_test_frame(
+    width: int,
+    height: int,
+    frame_num: int = 0,
+    pixel_format: VideoPixelFormat = VideoPixelFormat.I420,
+) -> VideoFrame:
     """テスト用の VideoFrame を作成する."""
-    data_size = width * height * 3 // 2  # I420
+    data_size = _calculate_frame_data_size(width, height, pixel_format)
     data = np.zeros(data_size, dtype=np.uint8)
     init: VideoFrameBufferInit = {
-        "format": VideoPixelFormat.I420,
+        "format": pixel_format,
         "coded_width": width,
         "coded_height": height,
         "timestamp": frame_num * 1000,
@@ -41,16 +60,28 @@ def _make_test_frame(width: int, height: int, frame_num: int = 0) -> VideoFrame:
     return frame
 
 
+# テスト対象のピクセルフォーマット
+PIXEL_FORMATS = [
+    VideoPixelFormat.I420,
+    VideoPixelFormat.I422,
+    VideoPixelFormat.I444,
+    VideoPixelFormat.NV12,
+    VideoPixelFormat.RGBA,
+    VideoPixelFormat.BGRA,
+    VideoPixelFormat.RGB,
+    VideoPixelFormat.BGR,
+]
+
+
 # =============================================================================
 # AV1 スケーリングテスト
 # =============================================================================
 
 
-def test_av1_encode_with_scaling():
-    """AV1 エンコーダのスケーリング機能テスト."""
-    # configure: 320x240 (出力解像度)
+@pytest.mark.parametrize("pixel_format", PIXEL_FORMATS)
+def test_av1_encode_with_scaling(pixel_format: VideoPixelFormat):
+    """AV1 エンコーダのスケーリング機能テスト (各ピクセルフォーマット)."""
     output_width, output_height = 320, 240
-    # encode: 640x480 のフレーム (入力解像度)
     input_width, input_height = 640, 480
 
     encoded_chunks = []
@@ -73,18 +104,15 @@ def on_error(error):
     }
     encoder.configure(config)
 
-    # 入力解像度のフレームを作成
-    frame = _make_test_frame(input_width, input_height, 0)
+    frame = _make_test_frame(input_width, input_height, 0, pixel_format)
     encoder.encode(frame, {"key_frame": True})
     encoder.flush()
     frame.close()
 
-    # エンコードが成功していることを確認
     assert len(encoded_chunks) >= 1
     assert encoded_chunks[0].byte_length > 0
     assert encoded_chunks[0].type == EncodedVideoChunkType.KEY
 
-    # デコードして出力解像度を確認
     decoded_frames = []
 
     def on_decode_output(frame):
@@ -102,23 +130,19 @@ def on_decode_error(error):
         decoder.decode(chunk)
     decoder.flush()
 
-    # デコードされたフレームが出力解像度になっていることを確認
     assert len(decoded_frames) >= 1
     for frame in decoded_frames:
-        assert frame.coded_width == output_width, (
-            f"出力幅が期待値と異なる: 期待値 {output_width}, 実際 {frame.coded_width}"
-        )
-        assert frame.coded_height == output_height, (
-            f"出力高さが期待値と異なる: 期待値 {output_height}, 実際 {frame.coded_height}"
-        )
+        assert frame.coded_width == output_width
+        assert frame.coded_height == output_height
         frame.close()
 
     encoder.close()
     decoder.close()
 
 
-def test_av1_encode_scaling_same_resolution():
-    """AV1 configure と同じ解像度のフレームはスケーリングなしでエンコードされることを確認."""
+@pytest.mark.parametrize("pixel_format", PIXEL_FORMATS)
+def test_av1_encode_scaling_same_resolution(pixel_format: VideoPixelFormat):
+    """AV1 configure と同じ解像度のフレームはスケーリングなしでエンコード (各ピクセルフォーマット)."""
     width, height = 320, 240
 
     encoded_chunks = []
@@ -141,24 +165,21 @@ def on_error(error):
     }
     encoder.configure(config)
 
-    # 同じ解像度のフレーム
-    frame = _make_test_frame(width, height, 0)
+    frame = _make_test_frame(width, height, 0, pixel_format)
     encoder.encode(frame, {"key_frame": True})
     encoder.flush()
     frame.close()
 
-    # エンコードが成功していることを確認
     assert len(encoded_chunks) >= 1
     assert encoded_chunks[0].byte_length > 0
 
     encoder.close()
 
 
-def test_av1_encode_scaling_multiple_frames():
-    """AV1 複数フレームでのスケーリングテスト."""
-    # configure: 320x240 (出力解像度)
+@pytest.mark.parametrize("pixel_format", PIXEL_FORMATS)
+def test_av1_encode_scaling_multiple_frames(pixel_format: VideoPixelFormat):
+    """AV1 複数フレームでのスケーリングテスト (各ピクセルフォーマット)."""
     output_width, output_height = 320, 240
-    # encode: 640x480 のフレーム (入力解像度)
     input_width, input_height = 640, 480
     num_frames = 3
 
@@ -182,15 +203,13 @@ def on_error(error):
     }
     encoder.configure(config)
 
-    # 入力解像度のフレームを複数作成・エンコード
     for i in range(num_frames):
-        frame = _make_test_frame(input_width, input_height, i)
+        frame = _make_test_frame(input_width, input_height, i, pixel_format)
         encoder.encode(frame, {"key_frame": i == 0})
         frame.close()
 
     encoder.flush()
 
-    # エンコードが成功していることを確認
     assert len(encoded_chunks) >= num_frames
 
     encoder.close()
@@ -205,11 +224,10 @@ def on_error(error):
     platform.system() not in ("Darwin", "Linux"),
     reason="VP8 は macOS / Linux のみサポート",
 )
-def test_vp8_encode_with_scaling():
-    """VP8 エンコーダのスケーリング機能テスト."""
-    # configure: 320x240 (出力解像度)
+@pytest.mark.parametrize("pixel_format", PIXEL_FORMATS)
+def test_vp8_encode_with_scaling(pixel_format: VideoPixelFormat):
+    """VP8 エンコーダのスケーリング機能テスト (各ピクセルフォーマット)."""
     output_width, output_height = 320, 240
-    # encode: 640x480 のフレーム (入力解像度)
     input_width, input_height = 640, 480
 
     encoded_chunks = []
@@ -232,18 +250,15 @@ def on_error(error):
     }
     encoder.configure(config)
 
-    # 入力解像度のフレームを作成
-    frame = _make_test_frame(input_width, input_height, 0)
+    frame = _make_test_frame(input_width, input_height, 0, pixel_format)
     encoder.encode(frame, {"key_frame": True})
     encoder.flush()
     frame.close()
 
-    # エンコードが成功していることを確認
     assert len(encoded_chunks) >= 1
     assert encoded_chunks[0].byte_length > 0
     assert encoded_chunks[0].type == EncodedVideoChunkType.KEY
 
-    # デコードして出力解像度を確認
     decoded_frames = []
 
     def on_decode_output(frame):
@@ -261,15 +276,10 @@ def on_decode_error(error):
         decoder.decode(chunk)
     decoder.flush()
 
-    # デコードされたフレームが出力解像度になっていることを確認
     assert len(decoded_frames) >= 1
     for frame in decoded_frames:
-        assert frame.coded_width == output_width, (
-            f"出力幅が期待値と異なる: 期待値 {output_width}, 実際 {frame.coded_width}"
-        )
-        assert frame.coded_height == output_height, (
-            f"出力高さが期待値と異なる: 期待値 {output_height}, 実際 {frame.coded_height}"
-        )
+        assert frame.coded_width == output_width
+        assert frame.coded_height == output_height
         frame.close()
 
     encoder.close()
@@ -280,8 +290,9 @@ def on_decode_error(error):
     platform.system() not in ("Darwin", "Linux"),
     reason="VP8 は macOS / Linux のみサポート",
 )
-def test_vp8_encode_scaling_same_resolution():
-    """VP8 configure と同じ解像度のフレームはスケーリングなしでエンコードされることを確認."""
+@pytest.mark.parametrize("pixel_format", PIXEL_FORMATS)
+def test_vp8_encode_scaling_same_resolution(pixel_format: VideoPixelFormat):
+    """VP8 configure と同じ解像度のフレームはスケーリングなしでエンコード (各ピクセルフォーマット)."""
     width, height = 320, 240
 
     encoded_chunks = []
@@ -304,13 +315,11 @@ def on_error(error):
     }
     encoder.configure(config)
 
-    # 同じ解像度のフレーム
-    frame = _make_test_frame(width, height, 0)
+    frame = _make_test_frame(width, height, 0, pixel_format)
     encoder.encode(frame, {"key_frame": True})
     encoder.flush()
     frame.close()
 
-    # エンコードが成功していることを確認
     assert len(encoded_chunks) >= 1
     assert encoded_chunks[0].byte_length > 0
 
@@ -326,11 +335,10 @@ def on_error(error):
     platform.system() not in ("Darwin", "Linux"),
     reason="VP9 は macOS / Linux のみサポート",
 )
-def test_vp9_encode_with_scaling():
-    """VP9 エンコーダのスケーリング機能テスト."""
-    # configure: 320x240 (出力解像度)
+@pytest.mark.parametrize("pixel_format", PIXEL_FORMATS)
+def test_vp9_encode_with_scaling(pixel_format: VideoPixelFormat):
+    """VP9 エンコーダのスケーリング機能テスト (各ピクセルフォーマット)."""
     output_width, output_height = 320, 240
-    # encode: 640x480 のフレーム (入力解像度)
     input_width, input_height = 640, 480
 
     encoded_chunks = []
@@ -353,18 +361,15 @@ def on_error(error):
     }
     encoder.configure(config)
 
-    # 入力解像度のフレームを作成
-    frame = _make_test_frame(input_width, input_height, 0)
+    frame = _make_test_frame(input_width, input_height, 0, pixel_format)
     encoder.encode(frame, {"key_frame": True})
     encoder.flush()
     frame.close()
 
-    # エンコードが成功していることを確認
     assert len(encoded_chunks) >= 1
     assert encoded_chunks[0].byte_length > 0
     assert encoded_chunks[0].type == EncodedVideoChunkType.KEY
 
-    # デコードして出力解像度を確認
     decoded_frames = []
 
     def on_decode_output(frame):
@@ -382,15 +387,10 @@ def on_decode_error(error):
         decoder.decode(chunk)
     decoder.flush()
 
-    # デコードされたフレームが出力解像度になっていることを確認
     assert len(decoded_frames) >= 1
     for frame in decoded_frames:
-        assert frame.coded_width == output_width, (
-            f"出力幅が期待値と異なる: 期待値 {output_width}, 実際 {frame.coded_width}"
-        )
-        assert frame.coded_height == output_height, (
-            f"出力高さが期待値と異なる: 期待値 {output_height}, 実際 {frame.coded_height}"
-        )
+        assert frame.coded_width == output_width
+        assert frame.coded_height == output_height
         frame.close()
 
     encoder.close()
@@ -401,8 +401,9 @@ def on_decode_error(error):
     platform.system() not in ("Darwin", "Linux"),
     reason="VP9 は macOS / Linux のみサポート",
 )
-def test_vp9_encode_scaling_same_resolution():
-    """VP9 configure と同じ解像度のフレームはスケーリングなしでエンコードされることを確認."""
+@pytest.mark.parametrize("pixel_format", PIXEL_FORMATS)
+def test_vp9_encode_scaling_same_resolution(pixel_format: VideoPixelFormat):
+    """VP9 configure と同じ解像度のフレームはスケーリングなしでエンコード (各ピクセルフォーマット)."""
     width, height = 320, 240
 
     encoded_chunks = []
@@ -425,13 +426,11 @@ def on_error(error):
     }
     encoder.configure(config)
 
-    # 同じ解像度のフレーム
-    frame = _make_test_frame(width, height, 0)
+    frame = _make_test_frame(width, height, 0, pixel_format)
     encoder.encode(frame, {"key_frame": True})
     encoder.flush()
     frame.close()
 
-    # エンコードが成功していることを確認
     assert len(encoded_chunks) >= 1
     assert encoded_chunks[0].byte_length > 0
 
@@ -442,11 +441,10 @@ def on_error(error):
     platform.system() not in ("Darwin", "Linux"),
     reason="VP9 は macOS / Linux のみサポート",
 )
-def test_vp9_encode_scaling_multiple_frames():
-    """VP9 複数フレームでのスケーリングテスト."""
-    # configure: 320x240 (出力解像度)
+@pytest.mark.parametrize("pixel_format", PIXEL_FORMATS)
+def test_vp9_encode_scaling_multiple_frames(pixel_format: VideoPixelFormat):
+    """VP9 複数フレームでのスケーリングテスト (各ピクセルフォーマット)."""
     output_width, output_height = 320, 240
-    # encode: 640x480 のフレーム (入力解像度)
     input_width, input_height = 640, 480
     num_frames = 3
 
@@ -470,15 +468,13 @@ def on_error(error):
     }
     encoder.configure(config)
 
-    # 入力解像度のフレームを複数作成・エンコード
     for i in range(num_frames):
-        frame = _make_test_frame(input_width, input_height, i)
+        frame = _make_test_frame(input_width, input_height, i, pixel_format)
         encoder.encode(frame, {"key_frame": i == 0})
         frame.close()
 
     encoder.flush()
 
-    # エンコードが成功していることを確認
     assert len(encoded_chunks) >= num_frames
 
     encoder.close()

From e2d85e2d45cc4d477be7d2b1b8de6ed1529fd646 Mon Sep 17 00:00:00 2001
From: voluntas <nakai@shiguredo.jp>
Date: Tue, 27 Jan 2026 01:49:09 +0900
Subject: [PATCH 02/12] =?UTF-8?q?CHANGES.md=20=E3=81=AB=E3=82=B9=E3=82=B1?=
 =?UTF-8?q?=E3=83=BC=E3=83=AA=E3=83=B3=E3=82=B0=E6=A9=9F=E8=83=BD=E3=81=AE?=
 =?UTF-8?q?=E8=A9=B3=E7=B4=B0=E3=82=92=E8=BF=BD=E8=A8=98=E3=81=99=E3=82=8B?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- 対応ピクセルフォーマット一覧を追加
- Apple Video Toolbox の制限を明記 (I420, NV12, BGRA のみ直接対応)
- libyuv による変換処理の説明を更新
---
 CHANGES.md | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/CHANGES.md b/CHANGES.md
index 06417c4..3d1c357 100644
--- a/CHANGES.md
+++ b/CHANGES.md
@@ -13,9 +13,10 @@
 
 - [ADD] VideoEncoder にスケーリング機能を追加する
   - WebCodecs API 仕様に準拠: encode で渡されるフレームの解像度と configure で指定した解像度が異なる場合に自動的にスケーリング
-  - Apple Video Toolbox: VTPixelTransferSession を使用 (Metal ベースの HW アクセラレーション)
-  - ソフトウェアエンコーダー (AV1/VP8/VP9): libyuv の I420Scale を使用
-  - NVENC / Intel VPL: libyuv の I420Scale を使用
+  - 対応ピクセルフォーマット: I420, I422, I444, NV12, RGBA, BGRA, RGB, BGR
+  - Apple Video Toolbox: VTPixelTransferSession を使用 (I420, NV12, BGRA のみ直接対応、他は NV12 に変換)
+  - ソフトウェアエンコーダー (AV1/VP8/VP9): libyuv を使用してフォーマット変換とスケーリング
+  - NVENC / Intel VPL: libyuv を使用してフォーマット変換とスケーリング
   - @voluntas
 - [ADD] VP9 で scalabilityMode (L1T2/L1T3) をサポートする
   - VideoEncoderConfig で `scalability_mode` を指定可能

From 06d8c32ac71bb9f99669016211017def01c3a349 Mon Sep 17 00:00:00 2001
From: voluntas <nakai@shiguredo.jp>
Date: Tue, 27 Jan 2026 08:40:30 +0900
Subject: [PATCH 03/12] =?UTF-8?q?=E3=82=A8=E3=83=B3=E3=82=B3=E3=83=BC?=
 =?UTF-8?q?=E3=83=80=E3=81=AE=E3=82=B9=E3=82=B1=E3=83=BC=E3=83=AA=E3=83=B3?=
 =?UTF-8?q?=E3=82=B0=E3=82=B3=E3=83=BC=E3=83=89=E3=82=92=20video=5Fscaler?=
 =?UTF-8?q?=20=E3=81=AB=E5=85=B1=E9=80=9A=E5=8C=96=E3=81=99=E3=82=8B?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- video_scaler.h/cpp を追加
  - scale_to_i420(): AOM/VPX エンコーダ向け
  - scale_to_nv12(): NVENC/Intel VPL エンコーダ向け
- 各エンコーダから重複コードを削除してヘルパーを使用
- 対応フォーマット: I420, I422, I444, NV12, RGBA, BGRA, RGB, BGR
---
 docs/PYTHON_INTERFACE.md                      |  14 +-
 src/bindings/video_encoder.cpp                |   1 +
 src/bindings/video_encoder_aom.cpp            |  58 +-
 .../video_encoder_apple_video_toolbox.cpp     | 151 +++-
 src/bindings/video_encoder_intel_vpl.cpp      |  80 +-
 src/bindings/video_encoder_nvidia.cpp         |  80 +-
 src/bindings/video_encoder_vpx.cpp            |  57 +-
 src/bindings/video_scaler.cpp                 | 776 ++++++++++++++++++
 src/bindings/video_scaler.h                   |  65 ++
 9 files changed, 1001 insertions(+), 281 deletions(-)
 create mode 100644 src/bindings/video_scaler.cpp
 create mode 100644 src/bindings/video_scaler.h

diff --git a/docs/PYTHON_INTERFACE.md b/docs/PYTHON_INTERFACE.md
index 6aa8171..eaf6429 100644
--- a/docs/PYTHON_INTERFACE.md
+++ b/docs/PYTHON_INTERFACE.md
@@ -949,18 +949,20 @@ encoder.close()
 
 **スケーリング実装の詳細**:
 
-| エンコーダー | スケーリング方式 | 備考 |
-|------------|----------------|------|
-| Apple Video Toolbox (H.264/HEVC) | VTPixelTransferSession | Metal ベースの HW アクセラレーション |
-| ソフトウェアエンコーダー (AV1/VP8/VP9) | libyuv I420Scale | kFilterBox 補間 |
-| NVIDIA Video Codec SDK (NVENC) | libyuv I420Scale | NV12→I420→スケーリング→NV12 |
-| Intel VPL | libyuv I420Scale | NV12→I420→スケーリング→NV12 |
+| エンコーダー | スケーリング方式 | 対応フォーマット |
+|------------|----------------|----------------|
+| Apple Video Toolbox (H.264/HEVC) | VTPixelTransferSession (HWA) | I420, NV12, BGRA |
+| ソフトウェアエンコーダー (AV1/VP8/VP9) | libyuv (各フォーマット対応) | I420, I422, I444, NV12, RGBA, BGRA, RGB, BGR |
+| NVIDIA Video Codec SDK (NVENC) | libyuv (各フォーマット対応) | I420, I422, I444, NV12, RGBA, BGRA, RGB, BGR |
+| Intel VPL | libyuv (各フォーマット対応) | I420, I422, I444, NV12, RGBA, BGRA, RGB, BGR |
 
 **注意事項**:
 
 - スケーリングはダウンスケール、アップスケールの両方に対応
 - アスペクト比は `configure()` で指定した解像度に合わせられる（引き伸ばし）
 - 同じ解像度のフレームはスケーリング処理をスキップ
+- 入力フォーマットに応じた libyuv スケーラーが使用される (I420Scale, I422Scale, I444Scale, NV12Scale, ARGBScale)
+- RGB/BGR フォーマットは I420/NV12 に変換後スケーリング (libyuv に RGBScale がないため)
 
 ## 独自インターフェース
 
diff --git a/src/bindings/video_encoder.cpp b/src/bindings/video_encoder.cpp
index 2e77158..2e33886 100644
--- a/src/bindings/video_encoder.cpp
+++ b/src/bindings/video_encoder.cpp
@@ -260,6 +260,7 @@ static ScalabilityModeConfig parse_scalability_mode(const std::string& mode) {
 #include "video_encoder_aom.cpp"
 #include "video_encoder_apple_video_toolbox.cpp"
 #include "video_encoder_nvidia.cpp"
+#include "video_scaler.cpp"
 #if defined(__APPLE__) || defined(__linux__)
 #include "video_encoder_vpx.cpp"
 #endif
diff --git a/src/bindings/video_encoder_aom.cpp b/src/bindings/video_encoder_aom.cpp
index e72e7fe..58b105c 100644
--- a/src/bindings/video_encoder_aom.cpp
+++ b/src/bindings/video_encoder_aom.cpp
@@ -3,9 +3,8 @@
 #include <cstring>
 #include <thread>
 
-#include <libyuv.h>
-
 #include "video_encoder.h"
+#include "video_scaler.h"
 
 // WebRTC の NumberOfThreads ロジックに準拠
 // タイル数（1, 2, 4, 8）に合わせてスレッド数を決定
@@ -363,53 +362,16 @@ void VideoEncoder::encode_frame_aom(const VideoFrame& frame,
     svc_metadata = SvcOutputMetadata(temporal_layer_id);
   }
 
-  // スケーリングが必要かどうかを判定
-  bool needs_scaling =
-      (frame.width() != config_.width || frame.height() != config_.height);
-
-  // スケーリング用のバッファ
-  std::vector<uint8_t> scaled_buffer;
-  const uint8_t* src_y = frame.plane_ptr(0);
-  const uint8_t* src_u = frame.plane_ptr(1);
-  const uint8_t* src_v = frame.plane_ptr(2);
-  int src_stride_y = static_cast<int>(frame.width());
-  int src_stride_u = static_cast<int>(frame.width() / 2);
-  int src_stride_v = static_cast<int>(frame.width() / 2);
-
-  // スケーリングが必要な場合は libyuv で変換
-  if (needs_scaling) {
-    uint32_t dst_width = config_.width;
-    uint32_t dst_height = config_.height;
-    size_t y_size = dst_width * dst_height;
-    size_t uv_size = (dst_width / 2) * (dst_height / 2);
-    scaled_buffer.resize(y_size + uv_size * 2);
-
-    uint8_t* dst_y = scaled_buffer.data();
-    uint8_t* dst_u = dst_y + y_size;
-    uint8_t* dst_v = dst_u + uv_size;
-    int dst_stride_y = static_cast<int>(dst_width);
-    int dst_stride_u = static_cast<int>(dst_width / 2);
-    int dst_stride_v = static_cast<int>(dst_width / 2);
-
-    int result = libyuv::I420Scale(
-        src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v,
-        static_cast<int>(frame.width()), static_cast<int>(frame.height()),
-        dst_y, dst_stride_y, dst_u, dst_stride_u, dst_v, dst_stride_v,
-        static_cast<int>(dst_width), static_cast<int>(dst_height),
-        libyuv::kFilterBox);
-
-    if (result != 0) {
-      throw std::runtime_error("libyuv::I420Scale failed");
-    }
+  // スケーリングと I420 変換
+  auto scaled =
+      video_scaler::scale_to_i420(frame, config_.width, config_.height);
 
-    // スケーリング後のポインタとストライドを更新
-    src_y = dst_y;
-    src_u = dst_u;
-    src_v = dst_v;
-    src_stride_y = dst_stride_y;
-    src_stride_u = dst_stride_u;
-    src_stride_v = dst_stride_v;
-  }
+  const uint8_t* src_y = scaled.y;
+  const uint8_t* src_u = scaled.u;
+  const uint8_t* src_v = scaled.v;
+  int src_stride_y = scaled.stride_y;
+  int src_stride_u = scaled.stride_u;
+  int src_stride_v = scaled.stride_v;
 
   // Wrap I420 memory from VideoFrame or scaled buffer
   aom_image_t img;
diff --git a/src/bindings/video_encoder_apple_video_toolbox.cpp b/src/bindings/video_encoder_apple_video_toolbox.cpp
index b4e3385..ad6495b 100644
--- a/src/bindings/video_encoder_apple_video_toolbox.cpp
+++ b/src/bindings/video_encoder_apple_video_toolbox.cpp
@@ -446,15 +446,41 @@ void VideoEncoder::encode_frame_videotoolbox(
 
   // native_buffer がない場合は CVPixelBuffer を作成してコピー
   if (!pb_from_native) {
-    // Make sure we have NV12 source
-    std::unique_ptr<VideoFrame> nv12;
-    if (frame.format() != VideoPixelFormat::NV12) {
-      nv12 = frame.convert_format(VideoPixelFormat::NV12);
+    // スケーリング時は VTPixelTransferSession でフォーマット変換とスケーリングを同時に行う
+    // VTPixelTransferSession がサポートするフォーマット: I420, NV12, BGRA
+    // スケーリングなしの場合は NV12 に変換が必要
+    bool use_native_format =
+        needs_scaling && (frame.format() == VideoPixelFormat::I420 ||
+                          frame.format() == VideoPixelFormat::NV12 ||
+                          frame.format() == VideoPixelFormat::BGRA);
+
+    // 入力フレームを変換するかどうかを決定
+    std::unique_ptr<VideoFrame> converted;
+    const VideoFrame* src_frame = &frame;
+
+    if (!use_native_format && frame.format() != VideoPixelFormat::NV12) {
+      // VTPixelTransferSession がサポートしないフォーマット、またはスケーリングなしの場合
+      // NV12 に変換
+      converted = frame.convert_format(VideoPixelFormat::NV12);
+      src_frame = converted.get();
+    }
+
+    // CVPixelBuffer のピクセルフォーマットを決定
+    OSType pixel_format;
+    switch (src_frame->format()) {
+      case VideoPixelFormat::I420:
+        pixel_format = kCVPixelFormatType_420YpCbCr8Planar;
+        break;
+      case VideoPixelFormat::BGRA:
+        pixel_format = kCVPixelFormatType_32BGRA;
+        break;
+      case VideoPixelFormat::NV12:
+      default:
+        pixel_format = kCVPixelFormatType_420YpCbCr8BiPlanarFullRange;
+        break;
     }
-    const VideoFrame& src = nv12 ? *nv12 : frame;
 
     // 入力フレームサイズの CVPixelBuffer を作成
-    OSType pixel_format = kCVPixelFormatType_420YpCbCr8BiPlanarFullRange;
     CFDictionaryRef empty_dict = CFDictionaryCreate(
         kCFAllocatorDefault, nullptr, nullptr, 0,
         &kCFTypeDictionaryKeyCallBacks, &kCFTypeDictionaryValueCallBacks);
@@ -464,8 +490,9 @@ void VideoEncoder::encode_frame_videotoolbox(
         kCFAllocatorDefault, pb_keys, pb_vals, 1,
         &kCFTypeDictionaryKeyCallBacks, &kCFTypeDictionaryValueCallBacks);
 
-    CVReturn r = CVPixelBufferCreate(kCFAllocatorDefault, src.width(),
-                                     src.height(), pixel_format, pb_attrs, &pb);
+    CVReturn r =
+        CVPixelBufferCreate(kCFAllocatorDefault, src_frame->width(),
+                            src_frame->height(), pixel_format, pb_attrs, &pb);
 
     CFRelease(pb_attrs);
     CFRelease(empty_dict);
@@ -474,37 +501,88 @@ void VideoEncoder::encode_frame_videotoolbox(
       throw std::runtime_error("Failed to create CVPixelBuffer for input");
     }
 
-    // Copy planes into CVPixelBuffer
+    // フォーマットに応じてデータをコピー
     CVPixelBufferLockBaseAddress(pb, 0);
-    uint8_t* dst_y = (uint8_t*)CVPixelBufferGetBaseAddressOfPlane(pb, 0);
-    size_t dst_stride_y = CVPixelBufferGetBytesPerRowOfPlane(pb, 0);
-    uint8_t* dst_uv = (uint8_t*)CVPixelBufferGetBaseAddressOfPlane(pb, 1);
-    size_t dst_stride_uv = CVPixelBufferGetBytesPerRowOfPlane(pb, 1);
-
-    const uint8_t* src_y = src.plane_ptr(0);
-    const uint8_t* src_uv = src.plane_ptr(1);
-    int width = static_cast<int>(src.width());
-    int height = static_cast<int>(src.height());
-    int chroma_height = (height + 1) / 2;
-    // Y plane
-    if (dst_stride_y == static_cast<size_t>(width)) {
-      memcpy(dst_y, src_y, static_cast<size_t>(width * height));
-    } else {
-      for (int i = 0; i < height; ++i) {
-        memcpy(dst_y + i * dst_stride_y, src_y + i * width, width);
+
+    switch (src_frame->format()) {
+      case VideoPixelFormat::I420: {
+        // I420: 3 プレーン (Y, U, V)
+        int width = static_cast<int>(src_frame->width());
+        int height = static_cast<int>(src_frame->height());
+        int chroma_width = (width + 1) / 2;
+        int chroma_height = (height + 1) / 2;
+
+        // Y plane
+        uint8_t* dst_y = (uint8_t*)CVPixelBufferGetBaseAddressOfPlane(pb, 0);
+        size_t dst_stride_y = CVPixelBufferGetBytesPerRowOfPlane(pb, 0);
+        const uint8_t* src_y = src_frame->plane_ptr(0);
+        for (int i = 0; i < height; ++i) {
+          memcpy(dst_y + i * dst_stride_y, src_y + i * width, width);
+        }
+
+        // U plane
+        uint8_t* dst_u = (uint8_t*)CVPixelBufferGetBaseAddressOfPlane(pb, 1);
+        size_t dst_stride_u = CVPixelBufferGetBytesPerRowOfPlane(pb, 1);
+        const uint8_t* src_u = src_frame->plane_ptr(1);
+        for (int i = 0; i < chroma_height; ++i) {
+          memcpy(dst_u + i * dst_stride_u, src_u + i * chroma_width,
+                 chroma_width);
+        }
+
+        // V plane
+        uint8_t* dst_v = (uint8_t*)CVPixelBufferGetBaseAddressOfPlane(pb, 2);
+        size_t dst_stride_v = CVPixelBufferGetBytesPerRowOfPlane(pb, 2);
+        const uint8_t* src_v = src_frame->plane_ptr(2);
+        for (int i = 0; i < chroma_height; ++i) {
+          memcpy(dst_v + i * dst_stride_v, src_v + i * chroma_width,
+                 chroma_width);
+        }
+        break;
       }
-    }
-    // UV plane (interleaved)
-    int chroma_row_bytes = ((width + 1) / 2) * 2;
-    if (dst_stride_uv == static_cast<size_t>(chroma_row_bytes)) {
-      memcpy(dst_uv, src_uv,
-             static_cast<size_t>(chroma_row_bytes * chroma_height));
-    } else {
-      for (int i = 0; i < chroma_height; ++i) {
-        memcpy(dst_uv + i * dst_stride_uv, src_uv + i * chroma_row_bytes,
-               chroma_row_bytes);
+
+      case VideoPixelFormat::BGRA: {
+        // BGRA: 単一プレーン
+        uint8_t* dst = (uint8_t*)CVPixelBufferGetBaseAddress(pb);
+        size_t dst_stride = CVPixelBufferGetBytesPerRow(pb);
+        const uint8_t* src = src_frame->plane_ptr(0);
+        int width = static_cast<int>(src_frame->width());
+        int height = static_cast<int>(src_frame->height());
+        size_t row_bytes = width * 4;
+
+        for (int i = 0; i < height; ++i) {
+          memcpy(dst + i * dst_stride, src + i * row_bytes, row_bytes);
+        }
+        break;
+      }
+
+      case VideoPixelFormat::NV12:
+      default: {
+        // NV12: 2 プレーン (Y, UV)
+        uint8_t* dst_y = (uint8_t*)CVPixelBufferGetBaseAddressOfPlane(pb, 0);
+        size_t dst_stride_y = CVPixelBufferGetBytesPerRowOfPlane(pb, 0);
+        uint8_t* dst_uv = (uint8_t*)CVPixelBufferGetBaseAddressOfPlane(pb, 1);
+        size_t dst_stride_uv = CVPixelBufferGetBytesPerRowOfPlane(pb, 1);
+
+        const uint8_t* src_y = src_frame->plane_ptr(0);
+        const uint8_t* src_uv = src_frame->plane_ptr(1);
+        int width = static_cast<int>(src_frame->width());
+        int height = static_cast<int>(src_frame->height());
+        int chroma_height = (height + 1) / 2;
+
+        // Y plane
+        for (int i = 0; i < height; ++i) {
+          memcpy(dst_y + i * dst_stride_y, src_y + i * width, width);
+        }
+        // UV plane (interleaved)
+        int chroma_row_bytes = ((width + 1) / 2) * 2;
+        for (int i = 0; i < chroma_height; ++i) {
+          memcpy(dst_uv + i * dst_stride_uv, src_uv + i * chroma_row_bytes,
+                 chroma_row_bytes);
+        }
+        break;
       }
     }
+
     CVPixelBufferUnlockBaseAddress(pb, 0);
   }
 
@@ -533,7 +611,8 @@ void VideoEncoder::encode_frame_videotoolbox(
           "Failed to create scaled CVPixelBuffer from pool");
     }
 
-    // VTPixelTransferSessionTransferImage でスケーリング
+    // VTPixelTransferSessionTransferImage でスケーリングとフォーマット変換を実行
+    // 入力は I420/NV12/BGRA のいずれか、出力は NV12
     auto transfer_session =
         (VTPixelTransferSessionRef)vt_pixel_transfer_session_;
     OSStatus transfer_err =
diff --git a/src/bindings/video_encoder_intel_vpl.cpp b/src/bindings/video_encoder_intel_vpl.cpp
index 56b8bfd..22a2c61 100644
--- a/src/bindings/video_encoder_intel_vpl.cpp
+++ b/src/bindings/video_encoder_intel_vpl.cpp
@@ -11,12 +11,11 @@
 #include <stdexcept>
 #include <vector>
 
-#include <libyuv.h>
-
 #include "../dyn/vpl.h"
 #include "encoded_video_chunk.h"
 #include "intel_vpl_helpers.h"
 #include "video_frame.h"
+#include "video_scaler.h"
 
 namespace nb = nanobind;
 
@@ -312,77 +311,14 @@ void VideoEncoder::encode_frame_intel_vpl(const VideoFrame& frame,
 
   mfxSession session = static_cast<mfxSession>(vpl_session_);
 
-  // NV12 フォーマットに変換
-  std::unique_ptr<VideoFrame> nv12;
-  if (frame.format() != VideoPixelFormat::NV12) {
-    nv12 = frame.convert_format(VideoPixelFormat::NV12);
-  }
-  const VideoFrame& src = nv12 ? *nv12 : frame;
-
-  // スケーリングが必要かどうかを判定
-  bool needs_scaling =
-      (src.width() != config_.width || src.height() != config_.height);
-
-  // スケーリング用のバッファ (I420 経由でスケーリング)
-  std::vector<uint8_t> scaled_i420_buffer;
-  std::vector<uint8_t> scaled_nv12_buffer;
-  const uint8_t* final_y = src.plane_ptr(0);
-  const uint8_t* final_uv = src.plane_ptr(1);
-  uint32_t final_width = src.width();
-  uint32_t final_height = src.height();
-
-  if (needs_scaling) {
-    // NV12 -> I420 に変換してからスケーリング
-    size_t src_i420_size =
-        src.width() * src.height() + (src.width() / 2) * (src.height() / 2) * 2;
-    std::vector<uint8_t> src_i420_buffer(src_i420_size);
-
-    uint8_t* src_i420_y = src_i420_buffer.data();
-    uint8_t* src_i420_u = src_i420_y + src.width() * src.height();
-    uint8_t* src_i420_v = src_i420_u + (src.width() / 2) * (src.height() / 2);
-
-    libyuv::NV12ToI420(src.plane_ptr(0), src.width(), src.plane_ptr(1),
-                       src.width(), src_i420_y, src.width(), src_i420_u,
-                       src.width() / 2, src_i420_v, src.width() / 2,
-                       src.width(), src.height());
-
-    // I420 でスケーリング
-    uint32_t dst_width = config_.width;
-    uint32_t dst_height = config_.height;
-    size_t dst_i420_size =
-        dst_width * dst_height + (dst_width / 2) * (dst_height / 2) * 2;
-    scaled_i420_buffer.resize(dst_i420_size);
-
-    uint8_t* dst_i420_y = scaled_i420_buffer.data();
-    uint8_t* dst_i420_u = dst_i420_y + dst_width * dst_height;
-    uint8_t* dst_i420_v = dst_i420_u + (dst_width / 2) * (dst_height / 2);
-
-    int result = libyuv::I420Scale(
-        src_i420_y, src.width(), src_i420_u, src.width() / 2, src_i420_v,
-        src.width() / 2, src.width(), src.height(), dst_i420_y, dst_width,
-        dst_i420_u, dst_width / 2, dst_i420_v, dst_width / 2, dst_width,
-        dst_height, libyuv::kFilterBox);
-
-    if (result != 0) {
-      throw std::runtime_error("libyuv::I420Scale failed");
-    }
-
-    // I420 -> NV12 に変換
-    size_t nv12_size = dst_width * dst_height * 3 / 2;
-    scaled_nv12_buffer.resize(nv12_size);
+  // スケーリングと NV12 変換
+  auto scaled =
+      video_scaler::scale_to_nv12(frame, config_.width, config_.height);
 
-    uint8_t* nv12_y = scaled_nv12_buffer.data();
-    uint8_t* nv12_uv = nv12_y + dst_width * dst_height;
-
-    libyuv::I420ToNV12(dst_i420_y, dst_width, dst_i420_u, dst_width / 2,
-                       dst_i420_v, dst_width / 2, nv12_y, dst_width, nv12_uv,
-                       dst_width, dst_width, dst_height);
-
-    final_y = nv12_y;
-    final_uv = nv12_uv;
-    final_width = dst_width;
-    final_height = dst_height;
-  }
+  const uint8_t* final_y = scaled.y;
+  const uint8_t* final_uv = scaled.uv;
+  uint32_t final_width = scaled.width;
+  uint32_t final_height = scaled.height;
 
   // サーフェスプールから未使用のサーフェスを取得
   intel_vpl::SurfacePool* pool =
diff --git a/src/bindings/video_encoder_nvidia.cpp b/src/bindings/video_encoder_nvidia.cpp
index deeae2f..d865d51 100644
--- a/src/bindings/video_encoder_nvidia.cpp
+++ b/src/bindings/video_encoder_nvidia.cpp
@@ -13,12 +13,11 @@
 #include <stdexcept>
 #include <vector>
 
-#include <libyuv.h>
-
 #include "../dyn/cuda.h"
 #include "../dyn/nvenc.h"
 #include "encoded_video_chunk.h"
 #include "video_frame.h"
+#include "video_scaler.h"
 
 namespace nb = nanobind;
 
@@ -385,77 +384,14 @@ void VideoEncoder::encode_frame_nvenc(const VideoFrame& frame,
     throw std::runtime_error("NVENC encoder is not initialized");
   }
 
-  // NV12 フォーマットに変換
-  std::unique_ptr<VideoFrame> nv12;
-  if (frame.format() != VideoPixelFormat::NV12) {
-    nv12 = frame.convert_format(VideoPixelFormat::NV12);
-  }
-  const VideoFrame& src = nv12 ? *nv12 : frame;
-
-  // スケーリングが必要かどうかを判定
-  bool needs_scaling =
-      (src.width() != config_.width || src.height() != config_.height);
-
-  // スケーリング用のバッファ (I420 経由でスケーリング)
-  std::vector<uint8_t> scaled_i420_buffer;
-  std::vector<uint8_t> scaled_nv12_buffer;
-  const uint8_t* final_y = src.plane_ptr(0);
-  const uint8_t* final_uv = src.plane_ptr(1);
-  uint32_t final_width = src.width();
-  uint32_t final_height = src.height();
-
-  if (needs_scaling) {
-    // NV12 -> I420 に変換してからスケーリング
-    size_t src_i420_size =
-        src.width() * src.height() + (src.width() / 2) * (src.height() / 2) * 2;
-    std::vector<uint8_t> src_i420_buffer(src_i420_size);
-
-    uint8_t* src_i420_y = src_i420_buffer.data();
-    uint8_t* src_i420_u = src_i420_y + src.width() * src.height();
-    uint8_t* src_i420_v = src_i420_u + (src.width() / 2) * (src.height() / 2);
-
-    libyuv::NV12ToI420(src.plane_ptr(0), src.width(), src.plane_ptr(1),
-                       src.width(), src_i420_y, src.width(), src_i420_u,
-                       src.width() / 2, src_i420_v, src.width() / 2,
-                       src.width(), src.height());
-
-    // I420 でスケーリング
-    uint32_t dst_width = config_.width;
-    uint32_t dst_height = config_.height;
-    size_t dst_i420_size =
-        dst_width * dst_height + (dst_width / 2) * (dst_height / 2) * 2;
-    scaled_i420_buffer.resize(dst_i420_size);
-
-    uint8_t* dst_i420_y = scaled_i420_buffer.data();
-    uint8_t* dst_i420_u = dst_i420_y + dst_width * dst_height;
-    uint8_t* dst_i420_v = dst_i420_u + (dst_width / 2) * (dst_height / 2);
-
-    int result = libyuv::I420Scale(
-        src_i420_y, src.width(), src_i420_u, src.width() / 2, src_i420_v,
-        src.width() / 2, src.width(), src.height(), dst_i420_y, dst_width,
-        dst_i420_u, dst_width / 2, dst_i420_v, dst_width / 2, dst_width,
-        dst_height, libyuv::kFilterBox);
-
-    if (result != 0) {
-      throw std::runtime_error("libyuv::I420Scale failed");
-    }
-
-    // I420 -> NV12 に変換
-    size_t nv12_size = dst_width * dst_height * 3 / 2;
-    scaled_nv12_buffer.resize(nv12_size);
+  // スケーリングと NV12 変換
+  auto scaled =
+      video_scaler::scale_to_nv12(frame, config_.width, config_.height);
 
-    uint8_t* nv12_y = scaled_nv12_buffer.data();
-    uint8_t* nv12_uv = nv12_y + dst_width * dst_height;
-
-    libyuv::I420ToNV12(dst_i420_y, dst_width, dst_i420_u, dst_width / 2,
-                       dst_i420_v, dst_width / 2, nv12_y, dst_width, nv12_uv,
-                       dst_width, dst_width, dst_height);
-
-    final_y = nv12_y;
-    final_uv = nv12_uv;
-    final_width = dst_width;
-    final_height = dst_height;
-  }
+  const uint8_t* final_y = scaled.y;
+  const uint8_t* final_uv = scaled.uv;
+  uint32_t final_width = scaled.width;
+  uint32_t final_height = scaled.height;
 
   // 入力バッファをロック
   NV_ENC_LOCK_INPUT_BUFFER lock_input_buffer = {};
diff --git a/src/bindings/video_encoder_vpx.cpp b/src/bindings/video_encoder_vpx.cpp
index bc27500..b8b6f7d 100644
--- a/src/bindings/video_encoder_vpx.cpp
+++ b/src/bindings/video_encoder_vpx.cpp
@@ -5,7 +5,7 @@
 #include <cstring>
 #include <thread>
 
-#include <libyuv.h>
+#include "video_scaler.h"
 
 // WebRTC の NumberOfThreads ロジックに準拠
 static int calculate_vpx_number_of_threads(int width,
@@ -284,53 +284,16 @@ void VideoEncoder::encode_frame_vpx(const VideoFrame& frame,
     vpx_codec_control(vpx_encoder_, VP9E_SET_SVC_LAYER_ID, &layer_id);
   }
 
-  // スケーリングが必要かどうかを判定
-  bool needs_scaling =
-      (frame.width() != config_.width || frame.height() != config_.height);
-
-  // スケーリング用のバッファ
-  std::vector<uint8_t> scaled_buffer;
-  const uint8_t* src_y = frame.plane_ptr(0);
-  const uint8_t* src_u = frame.plane_ptr(1);
-  const uint8_t* src_v = frame.plane_ptr(2);
-  int src_stride_y = static_cast<int>(frame.width());
-  int src_stride_u = static_cast<int>(frame.width() / 2);
-  int src_stride_v = static_cast<int>(frame.width() / 2);
-
-  // スケーリングが必要な場合は libyuv で変換
-  if (needs_scaling) {
-    uint32_t dst_width = config_.width;
-    uint32_t dst_height = config_.height;
-    size_t y_size = dst_width * dst_height;
-    size_t uv_size = (dst_width / 2) * (dst_height / 2);
-    scaled_buffer.resize(y_size + uv_size * 2);
-
-    uint8_t* dst_y = scaled_buffer.data();
-    uint8_t* dst_u = dst_y + y_size;
-    uint8_t* dst_v = dst_u + uv_size;
-    int dst_stride_y = static_cast<int>(dst_width);
-    int dst_stride_u = static_cast<int>(dst_width / 2);
-    int dst_stride_v = static_cast<int>(dst_width / 2);
-
-    int result = libyuv::I420Scale(
-        src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v,
-        static_cast<int>(frame.width()), static_cast<int>(frame.height()),
-        dst_y, dst_stride_y, dst_u, dst_stride_u, dst_v, dst_stride_v,
-        static_cast<int>(dst_width), static_cast<int>(dst_height),
-        libyuv::kFilterBox);
-
-    if (result != 0) {
-      throw std::runtime_error("libyuv::I420Scale failed");
-    }
+  // スケーリングと I420 変換
+  auto scaled =
+      video_scaler::scale_to_i420(frame, config_.width, config_.height);
 
-    // スケーリング後のポインタとストライドを更新
-    src_y = dst_y;
-    src_u = dst_u;
-    src_v = dst_v;
-    src_stride_y = dst_stride_y;
-    src_stride_u = dst_stride_u;
-    src_stride_v = dst_stride_v;
-  }
+  const uint8_t* src_y = scaled.y;
+  const uint8_t* src_u = scaled.u;
+  const uint8_t* src_v = scaled.v;
+  int src_stride_y = scaled.stride_y;
+  int src_stride_u = scaled.stride_u;
+  int src_stride_v = scaled.stride_v;
 
   // I420 イメージをラップ
   vpx_image_t img;
diff --git a/src/bindings/video_scaler.cpp b/src/bindings/video_scaler.cpp
new file mode 100644
index 0000000..3004046
--- /dev/null
+++ b/src/bindings/video_scaler.cpp
@@ -0,0 +1,776 @@
+// スケーリングヘルパー関数の実装
+
+#include "video_scaler.h"
+
+#include <libyuv.h>
+#include <stdexcept>
+
+namespace video_scaler {
+
+namespace {
+
+// I420 スケーリング
+int scale_i420(const uint8_t* src_y,
+               int src_stride_y,
+               const uint8_t* src_u,
+               int src_stride_u,
+               const uint8_t* src_v,
+               int src_stride_v,
+               int src_width,
+               int src_height,
+               uint8_t* dst_y,
+               int dst_stride_y,
+               uint8_t* dst_u,
+               int dst_stride_u,
+               uint8_t* dst_v,
+               int dst_stride_v,
+               int dst_width,
+               int dst_height) {
+  return libyuv::I420Scale(
+      src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v, src_width,
+      src_height, dst_y, dst_stride_y, dst_u, dst_stride_u, dst_v, dst_stride_v,
+      dst_width, dst_height, libyuv::kFilterBox);
+}
+
+// I422 スケーリング
+int scale_i422(const uint8_t* src_y,
+               int src_stride_y,
+               const uint8_t* src_u,
+               int src_stride_u,
+               const uint8_t* src_v,
+               int src_stride_v,
+               int src_width,
+               int src_height,
+               uint8_t* dst_y,
+               int dst_stride_y,
+               uint8_t* dst_u,
+               int dst_stride_u,
+               uint8_t* dst_v,
+               int dst_stride_v,
+               int dst_width,
+               int dst_height) {
+  return libyuv::I422Scale(
+      src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v, src_width,
+      src_height, dst_y, dst_stride_y, dst_u, dst_stride_u, dst_v, dst_stride_v,
+      dst_width, dst_height, libyuv::kFilterBox);
+}
+
+// I444 スケーリング
+int scale_i444(const uint8_t* src_y,
+               int src_stride_y,
+               const uint8_t* src_u,
+               int src_stride_u,
+               const uint8_t* src_v,
+               int src_stride_v,
+               int src_width,
+               int src_height,
+               uint8_t* dst_y,
+               int dst_stride_y,
+               uint8_t* dst_u,
+               int dst_stride_u,
+               uint8_t* dst_v,
+               int dst_stride_v,
+               int dst_width,
+               int dst_height) {
+  return libyuv::I444Scale(
+      src_y, src_stride_y, src_u, src_stride_u, src_v, src_stride_v, src_width,
+      src_height, dst_y, dst_stride_y, dst_u, dst_stride_u, dst_v, dst_stride_v,
+      dst_width, dst_height, libyuv::kFilterBox);
+}
+
+// NV12 スケーリング
+int scale_nv12(const uint8_t* src_y,
+               int src_stride_y,
+               const uint8_t* src_uv,
+               int src_stride_uv,
+               int src_width,
+               int src_height,
+               uint8_t* dst_y,
+               int dst_stride_y,
+               uint8_t* dst_uv,
+               int dst_stride_uv,
+               int dst_width,
+               int dst_height) {
+  return libyuv::NV12Scale(src_y, src_stride_y, src_uv, src_stride_uv,
+                           src_width, src_height, dst_y, dst_stride_y, dst_uv,
+                           dst_stride_uv, dst_width, dst_height,
+                           libyuv::kFilterBox);
+}
+
+// ARGB スケーリング
+int scale_argb(const uint8_t* src_argb,
+               int src_stride_argb,
+               int src_width,
+               int src_height,
+               uint8_t* dst_argb,
+               int dst_stride_argb,
+               int dst_width,
+               int dst_height) {
+  return libyuv::ARGBScale(src_argb, src_stride_argb, src_width, src_height,
+                           dst_argb, dst_stride_argb, dst_width, dst_height,
+                           libyuv::kFilterBox);
+}
+
+}  // namespace
+
+I420ScaleResult scale_to_i420(const VideoFrame& frame,
+                              uint32_t dst_width,
+                              uint32_t dst_height) {
+  I420ScaleResult result;
+  result.width = dst_width;
+  result.height = dst_height;
+
+  bool needs_scaling =
+      (frame.width() != dst_width || frame.height() != dst_height);
+
+  size_t y_size = dst_width * dst_height;
+  size_t uv_size = (dst_width / 2) * (dst_height / 2);
+
+  // スケーリングが不要かつ I420 の場合は元フレームのポインタを返す
+  if (!needs_scaling && frame.format() == VideoPixelFormat::I420) {
+    result.y = frame.plane_ptr(0);
+    result.u = frame.plane_ptr(1);
+    result.v = frame.plane_ptr(2);
+    result.stride_y = static_cast<int>(dst_width);
+    result.stride_u = static_cast<int>(dst_width / 2);
+    result.stride_v = static_cast<int>(dst_width / 2);
+    return result;
+  }
+
+  // スケーリング用のバッファ
+  std::vector<uint8_t> scaled_buffer;
+  uint32_t current_width = frame.width();
+  uint32_t current_height = frame.height();
+  VideoPixelFormat current_format = frame.format();
+
+  // 1. スケーリング (入力フォーマットのまま)
+  if (needs_scaling) {
+    int scale_result = 0;
+
+    switch (frame.format()) {
+      case VideoPixelFormat::I420: {
+        size_t dst_y_size = dst_width * dst_height;
+        size_t dst_uv_size = (dst_width / 2) * (dst_height / 2);
+        scaled_buffer.resize(dst_y_size + dst_uv_size * 2);
+        scale_result = scale_i420(
+            frame.plane_ptr(0), static_cast<int>(frame.width()),
+            frame.plane_ptr(1), static_cast<int>(frame.width() / 2),
+            frame.plane_ptr(2), static_cast<int>(frame.width() / 2),
+            static_cast<int>(frame.width()), static_cast<int>(frame.height()),
+            scaled_buffer.data(), static_cast<int>(dst_width),
+            scaled_buffer.data() + dst_y_size, static_cast<int>(dst_width / 2),
+            scaled_buffer.data() + dst_y_size + dst_uv_size,
+            static_cast<int>(dst_width / 2), static_cast<int>(dst_width),
+            static_cast<int>(dst_height));
+        break;
+      }
+      case VideoPixelFormat::I422: {
+        size_t dst_y_size = dst_width * dst_height;
+        size_t dst_uv_size = (dst_width / 2) * dst_height;
+        scaled_buffer.resize(dst_y_size + dst_uv_size * 2);
+        scale_result = scale_i422(
+            frame.plane_ptr(0), static_cast<int>(frame.width()),
+            frame.plane_ptr(1), static_cast<int>(frame.width() / 2),
+            frame.plane_ptr(2), static_cast<int>(frame.width() / 2),
+            static_cast<int>(frame.width()), static_cast<int>(frame.height()),
+            scaled_buffer.data(), static_cast<int>(dst_width),
+            scaled_buffer.data() + dst_y_size, static_cast<int>(dst_width / 2),
+            scaled_buffer.data() + dst_y_size + dst_uv_size,
+            static_cast<int>(dst_width / 2), static_cast<int>(dst_width),
+            static_cast<int>(dst_height));
+        break;
+      }
+      case VideoPixelFormat::I444: {
+        size_t plane_size = dst_width * dst_height;
+        scaled_buffer.resize(plane_size * 3);
+        scale_result = scale_i444(
+            frame.plane_ptr(0), static_cast<int>(frame.width()),
+            frame.plane_ptr(1), static_cast<int>(frame.width()),
+            frame.plane_ptr(2), static_cast<int>(frame.width()),
+            static_cast<int>(frame.width()), static_cast<int>(frame.height()),
+            scaled_buffer.data(), static_cast<int>(dst_width),
+            scaled_buffer.data() + plane_size, static_cast<int>(dst_width),
+            scaled_buffer.data() + plane_size * 2, static_cast<int>(dst_width),
+            static_cast<int>(dst_width), static_cast<int>(dst_height));
+        break;
+      }
+      case VideoPixelFormat::NV12: {
+        size_t nv12_size = dst_width * dst_height * 3 / 2;
+        scaled_buffer.resize(nv12_size);
+        scale_result = scale_nv12(
+            frame.plane_ptr(0), static_cast<int>(frame.width()),
+            frame.plane_ptr(1), static_cast<int>(frame.width()),
+            static_cast<int>(frame.width()), static_cast<int>(frame.height()),
+            scaled_buffer.data(), static_cast<int>(dst_width),
+            scaled_buffer.data() + dst_width * dst_height,
+            static_cast<int>(dst_width), static_cast<int>(dst_width),
+            static_cast<int>(dst_height));
+        break;
+      }
+      case VideoPixelFormat::RGBA:
+      case VideoPixelFormat::BGRA: {
+        size_t argb_size = dst_width * dst_height * 4;
+        scaled_buffer.resize(argb_size);
+        scale_result = scale_argb(
+            frame.plane_ptr(0), static_cast<int>(frame.width() * 4),
+            static_cast<int>(frame.width()), static_cast<int>(frame.height()),
+            scaled_buffer.data(), static_cast<int>(dst_width * 4),
+            static_cast<int>(dst_width), static_cast<int>(dst_height));
+        break;
+      }
+      case VideoPixelFormat::RGB:
+      case VideoPixelFormat::BGR: {
+        // RGB/BGR は直接スケーリングできないため、I420 に変換してからスケーリング
+        size_t src_y_size = frame.width() * frame.height();
+        size_t src_uv_size = (frame.width() / 2) * (frame.height() / 2);
+        std::vector<uint8_t> src_i420(src_y_size + src_uv_size * 2);
+        uint8_t* src_i420_y = src_i420.data();
+        uint8_t* src_i420_u = src_i420_y + src_y_size;
+        uint8_t* src_i420_v = src_i420_u + src_uv_size;
+
+        if (frame.format() == VideoPixelFormat::RGB) {
+          libyuv::RGB24ToI420(frame.plane_ptr(0),
+                              static_cast<int>(frame.width() * 3), src_i420_y,
+                              static_cast<int>(frame.width()), src_i420_u,
+                              static_cast<int>(frame.width() / 2), src_i420_v,
+                              static_cast<int>(frame.width() / 2),
+                              static_cast<int>(frame.width()),
+                              static_cast<int>(frame.height()));
+        } else {
+          libyuv::RAWToI420(frame.plane_ptr(0),
+                            static_cast<int>(frame.width() * 3), src_i420_y,
+                            static_cast<int>(frame.width()), src_i420_u,
+                            static_cast<int>(frame.width() / 2), src_i420_v,
+                            static_cast<int>(frame.width() / 2),
+                            static_cast<int>(frame.width()),
+                            static_cast<int>(frame.height()));
+        }
+
+        // I420 でスケーリング
+        size_t dst_y_size = dst_width * dst_height;
+        size_t dst_uv_size = (dst_width / 2) * (dst_height / 2);
+        scaled_buffer.resize(dst_y_size + dst_uv_size * 2);
+
+        scale_result = scale_i420(
+            src_i420_y, static_cast<int>(frame.width()), src_i420_u,
+            static_cast<int>(frame.width() / 2), src_i420_v,
+            static_cast<int>(frame.width() / 2),
+            static_cast<int>(frame.width()), static_cast<int>(frame.height()),
+            scaled_buffer.data(), static_cast<int>(dst_width),
+            scaled_buffer.data() + dst_y_size, static_cast<int>(dst_width / 2),
+            scaled_buffer.data() + dst_y_size + dst_uv_size,
+            static_cast<int>(dst_width / 2), static_cast<int>(dst_width),
+            static_cast<int>(dst_height));
+
+        // スケーリング後は I420
+        current_format = VideoPixelFormat::I420;
+        break;
+      }
+    }
+
+    if (scale_result != 0) {
+      throw std::runtime_error("libyuv scale failed");
+    }
+
+    current_width = dst_width;
+    current_height = dst_height;
+  }
+
+  // 2. I420 に変換
+  result.buffer.resize(y_size + uv_size * 2);
+  uint8_t* dst_y = result.buffer.data();
+  uint8_t* dst_u = dst_y + y_size;
+  uint8_t* dst_v = dst_u + uv_size;
+
+  if (current_format == VideoPixelFormat::I420) {
+    if (needs_scaling) {
+      // スケーリング済みの場合はバッファをコピー
+      result.buffer = std::move(scaled_buffer);
+      result.y = result.buffer.data();
+      result.u = result.buffer.data() + y_size;
+      result.v = result.buffer.data() + y_size + uv_size;
+    } else {
+      // スケーリング不要の場合は元フレームのポインタを返す
+      result.buffer.clear();
+      result.y = frame.plane_ptr(0);
+      result.u = frame.plane_ptr(1);
+      result.v = frame.plane_ptr(2);
+    }
+  } else {
+    // I420 以外のフォーマットは変換が必要
+    const uint8_t* src_data =
+        needs_scaling ? scaled_buffer.data() : frame.plane_ptr(0);
+
+    switch (current_format) {
+      case VideoPixelFormat::I422: {
+        size_t src_uv_size = (current_width / 2) * current_height;
+        const uint8_t* src_u_ptr =
+            needs_scaling
+                ? scaled_buffer.data() + current_width * current_height
+                : frame.plane_ptr(1);
+        const uint8_t* src_v_ptr =
+            needs_scaling ? scaled_buffer.data() +
+                                current_width * current_height + src_uv_size
+                          : frame.plane_ptr(2);
+        libyuv::I422ToI420(src_data, static_cast<int>(current_width), src_u_ptr,
+                           static_cast<int>(current_width / 2), src_v_ptr,
+                           static_cast<int>(current_width / 2), dst_y,
+                           static_cast<int>(current_width), dst_u,
+                           static_cast<int>(current_width / 2), dst_v,
+                           static_cast<int>(current_width / 2),
+                           static_cast<int>(current_width),
+                           static_cast<int>(current_height));
+        break;
+      }
+      case VideoPixelFormat::I444: {
+        size_t plane_size = current_width * current_height;
+        const uint8_t* src_u_ptr = needs_scaling
+                                       ? scaled_buffer.data() + plane_size
+                                       : frame.plane_ptr(1);
+        const uint8_t* src_v_ptr = needs_scaling
+                                       ? scaled_buffer.data() + plane_size * 2
+                                       : frame.plane_ptr(2);
+        libyuv::I444ToI420(src_data, static_cast<int>(current_width), src_u_ptr,
+                           static_cast<int>(current_width), src_v_ptr,
+                           static_cast<int>(current_width), dst_y,
+                           static_cast<int>(current_width), dst_u,
+                           static_cast<int>(current_width / 2), dst_v,
+                           static_cast<int>(current_width / 2),
+                           static_cast<int>(current_width),
+                           static_cast<int>(current_height));
+        break;
+      }
+      case VideoPixelFormat::NV12: {
+        const uint8_t* src_uv =
+            needs_scaling
+                ? scaled_buffer.data() + current_width * current_height
+                : frame.plane_ptr(1);
+        libyuv::NV12ToI420(src_data, static_cast<int>(current_width), src_uv,
+                           static_cast<int>(current_width), dst_y,
+                           static_cast<int>(current_width), dst_u,
+                           static_cast<int>(current_width / 2), dst_v,
+                           static_cast<int>(current_width / 2),
+                           static_cast<int>(current_width),
+                           static_cast<int>(current_height));
+        break;
+      }
+      case VideoPixelFormat::RGBA: {
+        libyuv::ABGRToI420(src_data, static_cast<int>(current_width * 4), dst_y,
+                           static_cast<int>(current_width), dst_u,
+                           static_cast<int>(current_width / 2), dst_v,
+                           static_cast<int>(current_width / 2),
+                           static_cast<int>(current_width),
+                           static_cast<int>(current_height));
+        break;
+      }
+      case VideoPixelFormat::BGRA: {
+        libyuv::ARGBToI420(src_data, static_cast<int>(current_width * 4), dst_y,
+                           static_cast<int>(current_width), dst_u,
+                           static_cast<int>(current_width / 2), dst_v,
+                           static_cast<int>(current_width / 2),
+                           static_cast<int>(current_width),
+                           static_cast<int>(current_height));
+        break;
+      }
+      case VideoPixelFormat::RGB: {
+        libyuv::RGB24ToI420(src_data, static_cast<int>(current_width * 3),
+                            dst_y, static_cast<int>(current_width), dst_u,
+                            static_cast<int>(current_width / 2), dst_v,
+                            static_cast<int>(current_width / 2),
+                            static_cast<int>(current_width),
+                            static_cast<int>(current_height));
+        break;
+      }
+      case VideoPixelFormat::BGR: {
+        libyuv::RAWToI420(src_data, static_cast<int>(current_width * 3), dst_y,
+                          static_cast<int>(current_width), dst_u,
+                          static_cast<int>(current_width / 2), dst_v,
+                          static_cast<int>(current_width / 2),
+                          static_cast<int>(current_width),
+                          static_cast<int>(current_height));
+        break;
+      }
+      default:
+        throw std::runtime_error(
+            "Unsupported pixel format for I420 conversion");
+    }
+
+    result.y = dst_y;
+    result.u = dst_u;
+    result.v = dst_v;
+  }
+
+  result.stride_y = static_cast<int>(dst_width);
+  result.stride_u = static_cast<int>(dst_width / 2);
+  result.stride_v = static_cast<int>(dst_width / 2);
+
+  return result;
+}
+
+NV12ScaleResult scale_to_nv12(const VideoFrame& frame,
+                              uint32_t dst_width,
+                              uint32_t dst_height) {
+  NV12ScaleResult result;
+  result.width = dst_width;
+  result.height = dst_height;
+
+  bool needs_scaling =
+      (frame.width() != dst_width || frame.height() != dst_height);
+
+  size_t nv12_size = dst_width * dst_height * 3 / 2;
+
+  // スケーリングが不要かつ NV12 の場合は元フレームのポインタを返す
+  if (!needs_scaling && frame.format() == VideoPixelFormat::NV12) {
+    result.y = frame.plane_ptr(0);
+    result.uv = frame.plane_ptr(1);
+    return result;
+  }
+
+  // スケーリング用のバッファ
+  std::vector<uint8_t> scaled_buffer;
+  uint32_t current_width = frame.width();
+  uint32_t current_height = frame.height();
+  VideoPixelFormat current_format = frame.format();
+
+  // 1. スケーリング (入力フォーマットのまま)
+  if (needs_scaling) {
+    int scale_result = 0;
+
+    switch (frame.format()) {
+      case VideoPixelFormat::I420: {
+        size_t dst_y_size = dst_width * dst_height;
+        size_t dst_uv_size = (dst_width / 2) * (dst_height / 2);
+        scaled_buffer.resize(dst_y_size + dst_uv_size * 2);
+        scale_result = scale_i420(
+            frame.plane_ptr(0), static_cast<int>(frame.width()),
+            frame.plane_ptr(1), static_cast<int>(frame.width() / 2),
+            frame.plane_ptr(2), static_cast<int>(frame.width() / 2),
+            static_cast<int>(frame.width()), static_cast<int>(frame.height()),
+            scaled_buffer.data(), static_cast<int>(dst_width),
+            scaled_buffer.data() + dst_y_size, static_cast<int>(dst_width / 2),
+            scaled_buffer.data() + dst_y_size + dst_uv_size,
+            static_cast<int>(dst_width / 2), static_cast<int>(dst_width),
+            static_cast<int>(dst_height));
+        break;
+      }
+      case VideoPixelFormat::I422: {
+        size_t dst_y_size = dst_width * dst_height;
+        size_t dst_uv_size = (dst_width / 2) * dst_height;
+        scaled_buffer.resize(dst_y_size + dst_uv_size * 2);
+        scale_result = scale_i422(
+            frame.plane_ptr(0), static_cast<int>(frame.width()),
+            frame.plane_ptr(1), static_cast<int>(frame.width() / 2),
+            frame.plane_ptr(2), static_cast<int>(frame.width() / 2),
+            static_cast<int>(frame.width()), static_cast<int>(frame.height()),
+            scaled_buffer.data(), static_cast<int>(dst_width),
+            scaled_buffer.data() + dst_y_size, static_cast<int>(dst_width / 2),
+            scaled_buffer.data() + dst_y_size + dst_uv_size,
+            static_cast<int>(dst_width / 2), static_cast<int>(dst_width),
+            static_cast<int>(dst_height));
+        break;
+      }
+      case VideoPixelFormat::I444: {
+        size_t plane_size = dst_width * dst_height;
+        scaled_buffer.resize(plane_size * 3);
+        scale_result = scale_i444(
+            frame.plane_ptr(0), static_cast<int>(frame.width()),
+            frame.plane_ptr(1), static_cast<int>(frame.width()),
+            frame.plane_ptr(2), static_cast<int>(frame.width()),
+            static_cast<int>(frame.width()), static_cast<int>(frame.height()),
+            scaled_buffer.data(), static_cast<int>(dst_width),
+            scaled_buffer.data() + plane_size, static_cast<int>(dst_width),
+            scaled_buffer.data() + plane_size * 2, static_cast<int>(dst_width),
+            static_cast<int>(dst_width), static_cast<int>(dst_height));
+        break;
+      }
+      case VideoPixelFormat::NV12: {
+        scaled_buffer.resize(nv12_size);
+        scale_result = scale_nv12(
+            frame.plane_ptr(0), static_cast<int>(frame.width()),
+            frame.plane_ptr(1), static_cast<int>(frame.width()),
+            static_cast<int>(frame.width()), static_cast<int>(frame.height()),
+            scaled_buffer.data(), static_cast<int>(dst_width),
+            scaled_buffer.data() + dst_width * dst_height,
+            static_cast<int>(dst_width), static_cast<int>(dst_width),
+            static_cast<int>(dst_height));
+        break;
+      }
+      case VideoPixelFormat::RGBA:
+      case VideoPixelFormat::BGRA: {
+        size_t argb_size = dst_width * dst_height * 4;
+        scaled_buffer.resize(argb_size);
+        scale_result = scale_argb(
+            frame.plane_ptr(0), static_cast<int>(frame.width() * 4),
+            static_cast<int>(frame.width()), static_cast<int>(frame.height()),
+            scaled_buffer.data(), static_cast<int>(dst_width * 4),
+            static_cast<int>(dst_width), static_cast<int>(dst_height));
+        break;
+      }
+      case VideoPixelFormat::RGB:
+      case VideoPixelFormat::BGR: {
+        // RGB/BGR は直接スケーリングできないため、NV12 に変換してからスケーリング
+        size_t src_i420_size = frame.width() * frame.height() * 3 / 2;
+        std::vector<uint8_t> src_i420(src_i420_size);
+        uint8_t* src_i420_y = src_i420.data();
+        uint8_t* src_i420_u = src_i420_y + frame.width() * frame.height();
+        uint8_t* src_i420_v =
+            src_i420_u + (frame.width() / 2) * (frame.height() / 2);
+
+        if (frame.format() == VideoPixelFormat::RGB) {
+          libyuv::RGB24ToI420(frame.plane_ptr(0),
+                              static_cast<int>(frame.width() * 3), src_i420_y,
+                              static_cast<int>(frame.width()), src_i420_u,
+                              static_cast<int>(frame.width() / 2), src_i420_v,
+                              static_cast<int>(frame.width() / 2),
+                              static_cast<int>(frame.width()),
+                              static_cast<int>(frame.height()));
+        } else {
+          libyuv::RAWToI420(frame.plane_ptr(0),
+                            static_cast<int>(frame.width() * 3), src_i420_y,
+                            static_cast<int>(frame.width()), src_i420_u,
+                            static_cast<int>(frame.width() / 2), src_i420_v,
+                            static_cast<int>(frame.width() / 2),
+                            static_cast<int>(frame.width()),
+                            static_cast<int>(frame.height()));
+        }
+
+        // I420 -> NV12
+        size_t src_nv12_size = frame.width() * frame.height() * 3 / 2;
+        std::vector<uint8_t> src_nv12(src_nv12_size);
+        uint8_t* src_nv12_y = src_nv12.data();
+        uint8_t* src_nv12_uv = src_nv12_y + frame.width() * frame.height();
+
+        libyuv::I420ToNV12(src_i420_y, static_cast<int>(frame.width()),
+                           src_i420_u, static_cast<int>(frame.width() / 2),
+                           src_i420_v, static_cast<int>(frame.width() / 2),
+                           src_nv12_y, static_cast<int>(frame.width()),
+                           src_nv12_uv, static_cast<int>(frame.width()),
+                           static_cast<int>(frame.width()),
+                           static_cast<int>(frame.height()));
+
+        // NV12 でスケーリング
+        scaled_buffer.resize(nv12_size);
+        scale_result = scale_nv12(
+            src_nv12_y, static_cast<int>(frame.width()), src_nv12_uv,
+            static_cast<int>(frame.width()), static_cast<int>(frame.width()),
+            static_cast<int>(frame.height()), scaled_buffer.data(),
+            static_cast<int>(dst_width),
+            scaled_buffer.data() + dst_width * dst_height,
+            static_cast<int>(dst_width), static_cast<int>(dst_width),
+            static_cast<int>(dst_height));
+
+        // スケーリング後は NV12
+        current_format = VideoPixelFormat::NV12;
+        break;
+      }
+    }
+
+    if (scale_result != 0) {
+      throw std::runtime_error("libyuv scale failed");
+    }
+
+    current_width = dst_width;
+    current_height = dst_height;
+  }
+
+  // 2. NV12 に変換
+  if (current_format == VideoPixelFormat::NV12) {
+    if (needs_scaling) {
+      // スケーリング済みの場合はバッファをそのまま使用
+      result.buffer = std::move(scaled_buffer);
+      result.y = result.buffer.data();
+      result.uv = result.buffer.data() + current_width * current_height;
+    } else {
+      // スケーリング不要の場合は元フレームのポインタを返す
+      result.y = frame.plane_ptr(0);
+      result.uv = frame.plane_ptr(1);
+    }
+  } else {
+    // NV12 以外のフォーマットは変換が必要
+    result.buffer.resize(nv12_size);
+    uint8_t* dst_y = result.buffer.data();
+    uint8_t* dst_uv = dst_y + current_width * current_height;
+
+    const uint8_t* src_data =
+        needs_scaling ? scaled_buffer.data() : frame.plane_ptr(0);
+
+    switch (current_format) {
+      case VideoPixelFormat::I420: {
+        size_t y_size = current_width * current_height;
+        size_t uv_size = (current_width / 2) * (current_height / 2);
+        const uint8_t* src_u =
+            needs_scaling ? scaled_buffer.data() + y_size : frame.plane_ptr(1);
+        const uint8_t* src_v = needs_scaling
+                                   ? scaled_buffer.data() + y_size + uv_size
+                                   : frame.plane_ptr(2);
+        libyuv::I420ToNV12(src_data, static_cast<int>(current_width), src_u,
+                           static_cast<int>(current_width / 2), src_v,
+                           static_cast<int>(current_width / 2), dst_y,
+                           static_cast<int>(current_width), dst_uv,
+                           static_cast<int>(current_width),
+                           static_cast<int>(current_width),
+                           static_cast<int>(current_height));
+        break;
+      }
+      case VideoPixelFormat::I422: {
+        size_t y_size = current_width * current_height;
+        size_t uv_size = (current_width / 2) * current_height;
+        const uint8_t* src_u =
+            needs_scaling ? scaled_buffer.data() + y_size : frame.plane_ptr(1);
+        const uint8_t* src_v = needs_scaling
+                                   ? scaled_buffer.data() + y_size + uv_size
+                                   : frame.plane_ptr(2);
+        // I422 -> I420 -> NV12
+        size_t i420_size = current_width * current_height * 3 / 2;
+        std::vector<uint8_t> i420_tmp(i420_size);
+        uint8_t* i420_y = i420_tmp.data();
+        uint8_t* i420_u = i420_y + current_width * current_height;
+        uint8_t* i420_v = i420_u + (current_width / 2) * (current_height / 2);
+        libyuv::I422ToI420(src_data, static_cast<int>(current_width), src_u,
+                           static_cast<int>(current_width / 2), src_v,
+                           static_cast<int>(current_width / 2), i420_y,
+                           static_cast<int>(current_width), i420_u,
+                           static_cast<int>(current_width / 2), i420_v,
+                           static_cast<int>(current_width / 2),
+                           static_cast<int>(current_width),
+                           static_cast<int>(current_height));
+        libyuv::I420ToNV12(i420_y, static_cast<int>(current_width), i420_u,
+                           static_cast<int>(current_width / 2), i420_v,
+                           static_cast<int>(current_width / 2), dst_y,
+                           static_cast<int>(current_width), dst_uv,
+                           static_cast<int>(current_width),
+                           static_cast<int>(current_width),
+                           static_cast<int>(current_height));
+        break;
+      }
+      case VideoPixelFormat::I444: {
+        size_t plane_size = current_width * current_height;
+        const uint8_t* src_u = needs_scaling ? scaled_buffer.data() + plane_size
+                                             : frame.plane_ptr(1);
+        const uint8_t* src_v = needs_scaling
+                                   ? scaled_buffer.data() + plane_size * 2
+                                   : frame.plane_ptr(2);
+        // I444 -> I420 -> NV12
+        size_t i420_size = current_width * current_height * 3 / 2;
+        std::vector<uint8_t> i420_tmp(i420_size);
+        uint8_t* i420_y = i420_tmp.data();
+        uint8_t* i420_u = i420_y + current_width * current_height;
+        uint8_t* i420_v = i420_u + (current_width / 2) * (current_height / 2);
+        libyuv::I444ToI420(src_data, static_cast<int>(current_width), src_u,
+                           static_cast<int>(current_width), src_v,
+                           static_cast<int>(current_width), i420_y,
+                           static_cast<int>(current_width), i420_u,
+                           static_cast<int>(current_width / 2), i420_v,
+                           static_cast<int>(current_width / 2),
+                           static_cast<int>(current_width),
+                           static_cast<int>(current_height));
+        libyuv::I420ToNV12(i420_y, static_cast<int>(current_width), i420_u,
+                           static_cast<int>(current_width / 2), i420_v,
+                           static_cast<int>(current_width / 2), dst_y,
+                           static_cast<int>(current_width), dst_uv,
+                           static_cast<int>(current_width),
+                           static_cast<int>(current_width),
+                           static_cast<int>(current_height));
+        break;
+      }
+      case VideoPixelFormat::RGBA: {
+        // RGBA -> I420 -> NV12
+        size_t i420_size = current_width * current_height * 3 / 2;
+        std::vector<uint8_t> i420_tmp(i420_size);
+        uint8_t* i420_y = i420_tmp.data();
+        uint8_t* i420_u = i420_y + current_width * current_height;
+        uint8_t* i420_v = i420_u + (current_width / 2) * (current_height / 2);
+        libyuv::ABGRToI420(src_data, static_cast<int>(current_width * 4),
+                           i420_y, static_cast<int>(current_width), i420_u,
+                           static_cast<int>(current_width / 2), i420_v,
+                           static_cast<int>(current_width / 2),
+                           static_cast<int>(current_width),
+                           static_cast<int>(current_height));
+        libyuv::I420ToNV12(i420_y, static_cast<int>(current_width), i420_u,
+                           static_cast<int>(current_width / 2), i420_v,
+                           static_cast<int>(current_width / 2), dst_y,
+                           static_cast<int>(current_width), dst_uv,
+                           static_cast<int>(current_width),
+                           static_cast<int>(current_width),
+                           static_cast<int>(current_height));
+        break;
+      }
+      case VideoPixelFormat::BGRA: {
+        // BGRA -> I420 -> NV12
+        size_t i420_size = current_width * current_height * 3 / 2;
+        std::vector<uint8_t> i420_tmp(i420_size);
+        uint8_t* i420_y = i420_tmp.data();
+        uint8_t* i420_u = i420_y + current_width * current_height;
+        uint8_t* i420_v = i420_u + (current_width / 2) * (current_height / 2);
+        libyuv::ARGBToI420(src_data, static_cast<int>(current_width * 4),
+                           i420_y, static_cast<int>(current_width), i420_u,
+                           static_cast<int>(current_width / 2), i420_v,
+                           static_cast<int>(current_width / 2),
+                           static_cast<int>(current_width),
+                           static_cast<int>(current_height));
+        libyuv::I420ToNV12(i420_y, static_cast<int>(current_width), i420_u,
+                           static_cast<int>(current_width / 2), i420_v,
+                           static_cast<int>(current_width / 2), dst_y,
+                           static_cast<int>(current_width), dst_uv,
+                           static_cast<int>(current_width),
+                           static_cast<int>(current_width),
+                           static_cast<int>(current_height));
+        break;
+      }
+      case VideoPixelFormat::RGB: {
+        // RGB -> I420 -> NV12
+        size_t i420_size = current_width * current_height * 3 / 2;
+        std::vector<uint8_t> i420_tmp(i420_size);
+        uint8_t* i420_y = i420_tmp.data();
+        uint8_t* i420_u = i420_y + current_width * current_height;
+        uint8_t* i420_v = i420_u + (current_width / 2) * (current_height / 2);
+        libyuv::RGB24ToI420(src_data, static_cast<int>(current_width * 3),
+                            i420_y, static_cast<int>(current_width), i420_u,
+                            static_cast<int>(current_width / 2), i420_v,
+                            static_cast<int>(current_width / 2),
+                            static_cast<int>(current_width),
+                            static_cast<int>(current_height));
+        libyuv::I420ToNV12(i420_y, static_cast<int>(current_width), i420_u,
+                           static_cast<int>(current_width / 2), i420_v,
+                           static_cast<int>(current_width / 2), dst_y,
+                           static_cast<int>(current_width), dst_uv,
+                           static_cast<int>(current_width),
+                           static_cast<int>(current_width),
+                           static_cast<int>(current_height));
+        break;
+      }
+      case VideoPixelFormat::BGR: {
+        // BGR -> I420 -> NV12
+        size_t i420_size = current_width * current_height * 3 / 2;
+        std::vector<uint8_t> i420_tmp(i420_size);
+        uint8_t* i420_y = i420_tmp.data();
+        uint8_t* i420_u = i420_y + current_width * current_height;
+        uint8_t* i420_v = i420_u + (current_width / 2) * (current_height / 2);
+        libyuv::RAWToI420(src_data, static_cast<int>(current_width * 3), i420_y,
+                          static_cast<int>(current_width), i420_u,
+                          static_cast<int>(current_width / 2), i420_v,
+                          static_cast<int>(current_width / 2),
+                          static_cast<int>(current_width),
+                          static_cast<int>(current_height));
+        libyuv::I420ToNV12(i420_y, static_cast<int>(current_width), i420_u,
+                           static_cast<int>(current_width / 2), i420_v,
+                           static_cast<int>(current_width / 2), dst_y,
+                           static_cast<int>(current_width), dst_uv,
+                           static_cast<int>(current_width),
+                           static_cast<int>(current_width),
+                           static_cast<int>(current_height));
+        break;
+      }
+      default:
+        throw std::runtime_error(
+            "Unsupported pixel format for NV12 conversion");
+    }
+
+    result.y = dst_y;
+    result.uv = dst_uv;
+  }
+
+  return result;
+}
+
+}  // namespace video_scaler
diff --git a/src/bindings/video_scaler.h b/src/bindings/video_scaler.h
new file mode 100644
index 0000000..df09c54
--- /dev/null
+++ b/src/bindings/video_scaler.h
@@ -0,0 +1,65 @@
+// スケーリングヘルパー関数
+// VideoFrame を指定サイズにスケーリングし、I420 または NV12 形式で返す
+
+#pragma once
+
+#include <cstdint>
+#include <vector>
+
+#include "video_frame.h"
+
+namespace video_scaler {
+
+// I420 形式のスケーリング結果
+// AOM, VPX エンコーダー用
+struct I420ScaleResult {
+  // スケーリング用バッファ
+  // スケーリング不要の場合は空
+  std::vector<uint8_t> buffer;
+
+  // 出力サイズ
+  uint32_t width;
+  uint32_t height;
+
+  // Y, U, V プレーンへのポインタ
+  // buffer が空の場合は元フレームのポインタ
+  const uint8_t* y;
+  const uint8_t* u;
+  const uint8_t* v;
+
+  // ストライド
+  int stride_y;
+  int stride_u;
+  int stride_v;
+};
+
+// NV12 形式のスケーリング結果
+// NVENC, Intel VPL エンコーダー用
+struct NV12ScaleResult {
+  // スケーリング用バッファ
+  // スケーリング不要の場合は空
+  std::vector<uint8_t> buffer;
+
+  // 出力サイズ
+  uint32_t width;
+  uint32_t height;
+
+  // Y, UV プレーンへのポインタ
+  // buffer が空の場合は元フレームのポインタ
+  const uint8_t* y;
+  const uint8_t* uv;
+};
+
+// フレームを I420 形式にスケーリング/変換
+// スケーリング不要かつ入力が I420 の場合は元フレームのポインタを返す
+I420ScaleResult scale_to_i420(const VideoFrame& frame,
+                              uint32_t dst_width,
+                              uint32_t dst_height);
+
+// フレームを NV12 形式にスケーリング/変換
+// スケーリング不要かつ入力が NV12 の場合は元フレームのポインタを返す
+NV12ScaleResult scale_to_nv12(const VideoFrame& frame,
+                              uint32_t dst_width,
+                              uint32_t dst_height);
+
+}  // namespace video_scaler

From 31b404e26a0daa1f7fce2bc0a0d2bf59bbf48abe Mon Sep 17 00:00:00 2001
From: voluntas <nakai@shiguredo.jp>
Date: Tue, 27 Jan 2026 08:50:04 +0900
Subject: [PATCH 04/12] =?UTF-8?q?video=5Fscaler=20=E3=81=AE=E4=B8=8D?=
 =?UTF-8?q?=E8=A6=81=E3=81=AA=E3=83=A1=E3=83=A2=E3=83=AA=E7=A2=BA=E4=BF=9D?=
 =?UTF-8?q?=E3=81=A8=E3=83=87=E3=83=83=E3=83=89=E3=82=B3=E3=83=BC=E3=83=89?=
 =?UTF-8?q?=E3=82=92=E5=89=8A=E9=99=A4=E3=81=99=E3=82=8B?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- scale_to_i420: I420 の場合に result.buffer.resize() を呼ばないように修正
- scale_to_i420: 到達不能なコードパス (needs_scaling == false) を削除
- scale_to_nv12: 到達不能なコードパス (needs_scaling == false) を削除
---
 src/bindings/video_scaler.cpp | 42 ++++++++++++-----------------------
 1 file changed, 14 insertions(+), 28 deletions(-)

diff --git a/src/bindings/video_scaler.cpp b/src/bindings/video_scaler.cpp
index 3004046..6acf0c8 100644
--- a/src/bindings/video_scaler.cpp
+++ b/src/bindings/video_scaler.cpp
@@ -277,27 +277,19 @@ I420ScaleResult scale_to_i420(const VideoFrame& frame,
   }
 
   // 2. I420 に変換
-  result.buffer.resize(y_size + uv_size * 2);
-  uint8_t* dst_y = result.buffer.data();
-  uint8_t* dst_u = dst_y + y_size;
-  uint8_t* dst_v = dst_u + uv_size;
-
   if (current_format == VideoPixelFormat::I420) {
-    if (needs_scaling) {
-      // スケーリング済みの場合はバッファをコピー
-      result.buffer = std::move(scaled_buffer);
-      result.y = result.buffer.data();
-      result.u = result.buffer.data() + y_size;
-      result.v = result.buffer.data() + y_size + uv_size;
-    } else {
-      // スケーリング不要の場合は元フレームのポインタを返す
-      result.buffer.clear();
-      result.y = frame.plane_ptr(0);
-      result.u = frame.plane_ptr(1);
-      result.v = frame.plane_ptr(2);
-    }
+    // スケーリング済みの I420 バッファをそのまま使用
+    result.buffer = std::move(scaled_buffer);
+    result.y = result.buffer.data();
+    result.u = result.buffer.data() + y_size;
+    result.v = result.buffer.data() + y_size + uv_size;
   } else {
     // I420 以外のフォーマットは変換が必要
+    result.buffer.resize(y_size + uv_size * 2);
+    uint8_t* dst_y = result.buffer.data();
+    uint8_t* dst_u = dst_y + y_size;
+    uint8_t* dst_v = dst_u + uv_size;
+
     const uint8_t* src_data =
         needs_scaling ? scaled_buffer.data() : frame.plane_ptr(0);
 
@@ -575,16 +567,10 @@ NV12ScaleResult scale_to_nv12(const VideoFrame& frame,
 
   // 2. NV12 に変換
   if (current_format == VideoPixelFormat::NV12) {
-    if (needs_scaling) {
-      // スケーリング済みの場合はバッファをそのまま使用
-      result.buffer = std::move(scaled_buffer);
-      result.y = result.buffer.data();
-      result.uv = result.buffer.data() + current_width * current_height;
-    } else {
-      // スケーリング不要の場合は元フレームのポインタを返す
-      result.y = frame.plane_ptr(0);
-      result.uv = frame.plane_ptr(1);
-    }
+    // スケーリング済みの NV12 バッファをそのまま使用
+    result.buffer = std::move(scaled_buffer);
+    result.y = result.buffer.data();
+    result.uv = result.buffer.data() + current_width * current_height;
   } else {
     // NV12 以外のフォーマットは変換が必要
     result.buffer.resize(nv12_size);

From c51ad67ab29a74aa332fd6e7126c73250b4a6b42 Mon Sep 17 00:00:00 2001
From: voluntas <nakai@shiguredo.jp>
Date: Tue, 27 Jan 2026 08:53:03 +0900
Subject: [PATCH 05/12] =?UTF-8?q?test=5Fencoder=5Fscaling.py=20=E3=81=AE?=
 =?UTF-8?q?=E3=82=B3=E3=83=A1=E3=83=B3=E3=83=88=E3=82=92=E5=BE=A9=E5=85=83?=
 =?UTF-8?q?=E3=81=99=E3=82=8B?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 tests/test_encoder_scaling.py | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/tests/test_encoder_scaling.py b/tests/test_encoder_scaling.py
index 55d1204..38855a1 100644
--- a/tests/test_encoder_scaling.py
+++ b/tests/test_encoder_scaling.py
@@ -81,7 +81,9 @@ def _make_test_frame(
 @pytest.mark.parametrize("pixel_format", PIXEL_FORMATS)
 def test_av1_encode_with_scaling(pixel_format: VideoPixelFormat):
     """AV1 エンコーダのスケーリング機能テスト (各ピクセルフォーマット)."""
+    # configure: 320x240 (出力解像度)
     output_width, output_height = 320, 240
+    # encode: 640x480 のフレーム (入力解像度)
     input_width, input_height = 640, 480
 
     encoded_chunks = []
@@ -104,15 +106,18 @@ def on_error(error):
     }
     encoder.configure(config)
 
+    # 入力解像度のフレームを作成
     frame = _make_test_frame(input_width, input_height, 0, pixel_format)
     encoder.encode(frame, {"key_frame": True})
     encoder.flush()
     frame.close()
 
+    # エンコードが成功していることを確認
     assert len(encoded_chunks) >= 1
     assert encoded_chunks[0].byte_length > 0
     assert encoded_chunks[0].type == EncodedVideoChunkType.KEY
 
+    # デコードして出力解像度を確認
     decoded_frames = []
 
     def on_decode_output(frame):
@@ -130,6 +135,7 @@ def on_decode_error(error):
         decoder.decode(chunk)
     decoder.flush()
 
+    # デコードされたフレームが出力解像度になっていることを確認
     assert len(decoded_frames) >= 1
     for frame in decoded_frames:
         assert frame.coded_width == output_width
@@ -227,7 +233,9 @@ def on_error(error):
 @pytest.mark.parametrize("pixel_format", PIXEL_FORMATS)
 def test_vp8_encode_with_scaling(pixel_format: VideoPixelFormat):
     """VP8 エンコーダのスケーリング機能テスト (各ピクセルフォーマット)."""
+    # configure: 320x240 (出力解像度)
     output_width, output_height = 320, 240
+    # encode: 640x480 のフレーム (入力解像度)
     input_width, input_height = 640, 480
 
     encoded_chunks = []
@@ -250,15 +258,18 @@ def on_error(error):
     }
     encoder.configure(config)
 
+    # 入力解像度のフレームを作成
     frame = _make_test_frame(input_width, input_height, 0, pixel_format)
     encoder.encode(frame, {"key_frame": True})
     encoder.flush()
     frame.close()
 
+    # エンコードが成功していることを確認
     assert len(encoded_chunks) >= 1
     assert encoded_chunks[0].byte_length > 0
     assert encoded_chunks[0].type == EncodedVideoChunkType.KEY
 
+    # デコードして出力解像度を確認
     decoded_frames = []
 
     def on_decode_output(frame):
@@ -276,6 +287,7 @@ def on_decode_error(error):
         decoder.decode(chunk)
     decoder.flush()
 
+    # デコードされたフレームが出力解像度になっていることを確認
     assert len(decoded_frames) >= 1
     for frame in decoded_frames:
         assert frame.coded_width == output_width
@@ -338,7 +350,9 @@ def on_error(error):
 @pytest.mark.parametrize("pixel_format", PIXEL_FORMATS)
 def test_vp9_encode_with_scaling(pixel_format: VideoPixelFormat):
     """VP9 エンコーダのスケーリング機能テスト (各ピクセルフォーマット)."""
+    # configure: 320x240 (出力解像度)
     output_width, output_height = 320, 240
+    # encode: 640x480 のフレーム (入力解像度)
     input_width, input_height = 640, 480
 
     encoded_chunks = []
@@ -361,15 +375,18 @@ def on_error(error):
     }
     encoder.configure(config)
 
+    # 入力解像度のフレームを作成
     frame = _make_test_frame(input_width, input_height, 0, pixel_format)
     encoder.encode(frame, {"key_frame": True})
     encoder.flush()
     frame.close()
 
+    # エンコードが成功していることを確認
     assert len(encoded_chunks) >= 1
     assert encoded_chunks[0].byte_length > 0
     assert encoded_chunks[0].type == EncodedVideoChunkType.KEY
 
+    # デコードして出力解像度を確認
     decoded_frames = []
 
     def on_decode_output(frame):
@@ -387,6 +404,7 @@ def on_decode_error(error):
         decoder.decode(chunk)
     decoder.flush()
 
+    # デコードされたフレームが出力解像度になっていることを確認
     assert len(decoded_frames) >= 1
     for frame in decoded_frames:
         assert frame.coded_width == output_width

From aaf1efcd662ea75b68c947c0ac8109fccf172657 Mon Sep 17 00:00:00 2001
From: voluntas <nakai@shiguredo.jp>
Date: Tue, 27 Jan 2026 09:01:05 +0900
Subject: [PATCH 06/12] =?UTF-8?q?test=5Fencoder=5Fscaling.py=20=E3=81=AE?=
 =?UTF-8?q?=E3=82=B3=E3=83=A1=E3=83=B3=E3=83=88=E3=82=92=E5=BE=A9=E5=85=83?=
 =?UTF-8?q?=E3=81=99=E3=82=8B?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 tests/test_encoder_scaling.py | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/tests/test_encoder_scaling.py b/tests/test_encoder_scaling.py
index 38855a1..74cc581 100644
--- a/tests/test_encoder_scaling.py
+++ b/tests/test_encoder_scaling.py
@@ -171,11 +171,13 @@ def on_error(error):
     }
     encoder.configure(config)
 
+    # 同じ解像度のフレーム
     frame = _make_test_frame(width, height, 0, pixel_format)
     encoder.encode(frame, {"key_frame": True})
     encoder.flush()
     frame.close()
 
+    # エンコードが成功していることを確認
     assert len(encoded_chunks) >= 1
     assert encoded_chunks[0].byte_length > 0
 
@@ -185,7 +187,9 @@ def on_error(error):
 @pytest.mark.parametrize("pixel_format", PIXEL_FORMATS)
 def test_av1_encode_scaling_multiple_frames(pixel_format: VideoPixelFormat):
     """AV1 複数フレームでのスケーリングテスト (各ピクセルフォーマット)."""
+    # configure: 320x240 (出力解像度)
     output_width, output_height = 320, 240
+    # encode: 640x480 のフレーム (入力解像度)
     input_width, input_height = 640, 480
     num_frames = 3
 
@@ -209,6 +213,7 @@ def on_error(error):
     }
     encoder.configure(config)
 
+    # 入力解像度のフレームを複数作成・エンコード
     for i in range(num_frames):
         frame = _make_test_frame(input_width, input_height, i, pixel_format)
         encoder.encode(frame, {"key_frame": i == 0})
@@ -216,6 +221,7 @@ def on_error(error):
 
     encoder.flush()
 
+    # エンコードが成功していることを確認
     assert len(encoded_chunks) >= num_frames
 
     encoder.close()
@@ -327,11 +333,13 @@ def on_error(error):
     }
     encoder.configure(config)
 
+    # 同じ解像度のフレーム
     frame = _make_test_frame(width, height, 0, pixel_format)
     encoder.encode(frame, {"key_frame": True})
     encoder.flush()
     frame.close()
 
+    # エンコードが成功していることを確認
     assert len(encoded_chunks) >= 1
     assert encoded_chunks[0].byte_length > 0
 
@@ -444,11 +452,13 @@ def on_error(error):
     }
     encoder.configure(config)
 
+    # 同じ解像度のフレーム
     frame = _make_test_frame(width, height, 0, pixel_format)
     encoder.encode(frame, {"key_frame": True})
     encoder.flush()
     frame.close()
 
+    # エンコードが成功していることを確認
     assert len(encoded_chunks) >= 1
     assert encoded_chunks[0].byte_length > 0
 
@@ -462,7 +472,9 @@ def on_error(error):
 @pytest.mark.parametrize("pixel_format", PIXEL_FORMATS)
 def test_vp9_encode_scaling_multiple_frames(pixel_format: VideoPixelFormat):
     """VP9 複数フレームでのスケーリングテスト (各ピクセルフォーマット)."""
+    # configure: 320x240 (出力解像度)
     output_width, output_height = 320, 240
+    # encode: 640x480 のフレーム (入力解像度)
     input_width, input_height = 640, 480
     num_frames = 3
 
@@ -486,6 +498,7 @@ def on_error(error):
     }
     encoder.configure(config)
 
+    # 入力解像度のフレームを複数作成・エンコード
     for i in range(num_frames):
         frame = _make_test_frame(input_width, input_height, i, pixel_format)
         encoder.encode(frame, {"key_frame": i == 0})
@@ -493,6 +506,7 @@ def on_error(error):
 
     encoder.flush()
 
+    # エンコードが成功していることを確認
     assert len(encoded_chunks) >= num_frames
 
     encoder.close()

From 96a9f6e880a12348043df89d74cbde295d8d1bcb Mon Sep 17 00:00:00 2001
From: voluntas <nakai@shiguredo.jp>
Date: Tue, 27 Jan 2026 09:03:12 +0900
Subject: [PATCH 07/12] =?UTF-8?q?PIXEL=5FFORMATS=20=E5=A4=89=E6=95=B0?=
 =?UTF-8?q?=E3=82=92=E5=89=8A=E9=99=A4=E3=81=97=E3=81=A6=20VideoPixelForma?=
 =?UTF-8?q?t=20=E3=82=92=E7=9B=B4=E6=8E=A5=E4=BD=BF=E7=94=A8=E3=81=99?=
 =?UTF-8?q?=E3=82=8B?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 tests/test_encoder_scaling.py | 29 ++++++++---------------------
 1 file changed, 8 insertions(+), 21 deletions(-)

diff --git a/tests/test_encoder_scaling.py b/tests/test_encoder_scaling.py
index 74cc581..4b6f406 100644
--- a/tests/test_encoder_scaling.py
+++ b/tests/test_encoder_scaling.py
@@ -60,25 +60,12 @@ def _make_test_frame(
     return frame
 
 
-# テスト対象のピクセルフォーマット
-PIXEL_FORMATS = [
-    VideoPixelFormat.I420,
-    VideoPixelFormat.I422,
-    VideoPixelFormat.I444,
-    VideoPixelFormat.NV12,
-    VideoPixelFormat.RGBA,
-    VideoPixelFormat.BGRA,
-    VideoPixelFormat.RGB,
-    VideoPixelFormat.BGR,
-]
-
-
 # =============================================================================
 # AV1 スケーリングテスト
 # =============================================================================
 
 
-@pytest.mark.parametrize("pixel_format", PIXEL_FORMATS)
+@pytest.mark.parametrize("pixel_format", VideoPixelFormat)
 def test_av1_encode_with_scaling(pixel_format: VideoPixelFormat):
     """AV1 エンコーダのスケーリング機能テスト (各ピクセルフォーマット)."""
     # configure: 320x240 (出力解像度)
@@ -146,7 +133,7 @@ def on_decode_error(error):
     decoder.close()
 
 
-@pytest.mark.parametrize("pixel_format", PIXEL_FORMATS)
+@pytest.mark.parametrize("pixel_format", VideoPixelFormat)
 def test_av1_encode_scaling_same_resolution(pixel_format: VideoPixelFormat):
     """AV1 configure と同じ解像度のフレームはスケーリングなしでエンコード (各ピクセルフォーマット)."""
     width, height = 320, 240
@@ -184,7 +171,7 @@ def on_error(error):
     encoder.close()
 
 
-@pytest.mark.parametrize("pixel_format", PIXEL_FORMATS)
+@pytest.mark.parametrize("pixel_format", VideoPixelFormat)
 def test_av1_encode_scaling_multiple_frames(pixel_format: VideoPixelFormat):
     """AV1 複数フレームでのスケーリングテスト (各ピクセルフォーマット)."""
     # configure: 320x240 (出力解像度)
@@ -236,7 +223,7 @@ def on_error(error):
     platform.system() not in ("Darwin", "Linux"),
     reason="VP8 は macOS / Linux のみサポート",
 )
-@pytest.mark.parametrize("pixel_format", PIXEL_FORMATS)
+@pytest.mark.parametrize("pixel_format", VideoPixelFormat)
 def test_vp8_encode_with_scaling(pixel_format: VideoPixelFormat):
     """VP8 エンコーダのスケーリング機能テスト (各ピクセルフォーマット)."""
     # configure: 320x240 (出力解像度)
@@ -308,7 +295,7 @@ def on_decode_error(error):
     platform.system() not in ("Darwin", "Linux"),
     reason="VP8 は macOS / Linux のみサポート",
 )
-@pytest.mark.parametrize("pixel_format", PIXEL_FORMATS)
+@pytest.mark.parametrize("pixel_format", VideoPixelFormat)
 def test_vp8_encode_scaling_same_resolution(pixel_format: VideoPixelFormat):
     """VP8 configure と同じ解像度のフレームはスケーリングなしでエンコード (各ピクセルフォーマット)."""
     width, height = 320, 240
@@ -355,7 +342,7 @@ def on_error(error):
     platform.system() not in ("Darwin", "Linux"),
     reason="VP9 は macOS / Linux のみサポート",
 )
-@pytest.mark.parametrize("pixel_format", PIXEL_FORMATS)
+@pytest.mark.parametrize("pixel_format", VideoPixelFormat)
 def test_vp9_encode_with_scaling(pixel_format: VideoPixelFormat):
     """VP9 エンコーダのスケーリング機能テスト (各ピクセルフォーマット)."""
     # configure: 320x240 (出力解像度)
@@ -427,7 +414,7 @@ def on_decode_error(error):
     platform.system() not in ("Darwin", "Linux"),
     reason="VP9 は macOS / Linux のみサポート",
 )
-@pytest.mark.parametrize("pixel_format", PIXEL_FORMATS)
+@pytest.mark.parametrize("pixel_format", VideoPixelFormat)
 def test_vp9_encode_scaling_same_resolution(pixel_format: VideoPixelFormat):
     """VP9 configure と同じ解像度のフレームはスケーリングなしでエンコード (各ピクセルフォーマット)."""
     width, height = 320, 240
@@ -469,7 +456,7 @@ def on_error(error):
     platform.system() not in ("Darwin", "Linux"),
     reason="VP9 は macOS / Linux のみサポート",
 )
-@pytest.mark.parametrize("pixel_format", PIXEL_FORMATS)
+@pytest.mark.parametrize("pixel_format", VideoPixelFormat)
 def test_vp9_encode_scaling_multiple_frames(pixel_format: VideoPixelFormat):
     """VP9 複数フレームでのスケーリングテスト (各ピクセルフォーマット)."""
     # configure: 320x240 (出力解像度)

From f00baff5f5f7e77ae52e49aa60a1dcf6e12f948e Mon Sep 17 00:00:00 2001
From: voluntas <nakai@shiguredo.jp>
Date: Tue, 27 Jan 2026 09:05:56 +0900
Subject: [PATCH 08/12] =?UTF-8?q?test=5Fencoder=5Fscaling.py=20=E3=81=AB?=
 =?UTF-8?q?=E3=83=86=E3=82=B9=E3=83=88=E3=83=87=E3=83=BC=E3=82=BF=E3=81=AB?=
 =?UTF-8?q?=E3=81=A4=E3=81=84=E3=81=A6=E3=81=AE=E3=82=B3=E3=83=A1=E3=83=B3?=
 =?UTF-8?q?=E3=83=88=E3=82=92=E8=BF=BD=E5=8A=A0=E3=81=99=E3=82=8B?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 tests/test_encoder_scaling.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/tests/test_encoder_scaling.py b/tests/test_encoder_scaling.py
index 4b6f406..05dde02 100644
--- a/tests/test_encoder_scaling.py
+++ b/tests/test_encoder_scaling.py
@@ -6,6 +6,12 @@
 - ソフトウェアエンコーダー (AV1, VP8, VP9): libyuv を使用
 - ハードウェアエンコーダー (NVENC, Intel VPL): libyuv を使用
 - Apple Video Toolbox: VTPixelTransferSession を使用 (test_apple_video_toolbox.py)
+
+テストデータについて:
+    このテストでは全てのピクセルフォーマット (I420, I422, I444, NV12, RGBA, BGRA, RGB, BGR)
+    に対してスケーリング機能をテストする。テストフレームのデータは全てゼロ (黒) だが、
+    各フォーマットに応じた正しいサイズで生成される。VideoFrame はサイズと format 指定に
+    基づいてデータを解釈するため、スケーリング機能のテストとしてはサイズが正しければ十分。
 """
 
 import platform

From 8c14b50d00de5fb3464ee0c82bf2f2f2d8b4827a Mon Sep 17 00:00:00 2001
From: voluntas <nakai@shiguredo.jp>
Date: Tue, 27 Jan 2026 09:10:58 +0900
Subject: [PATCH 09/12] =?UTF-8?q?test=5Fencoder=5Fscaling.py=20=E3=82=92?=
 =?UTF-8?q?=20codec=20=E3=81=A8=20pixel=5Fformat=20=E3=81=AE=E4=B8=A1?=
 =?UTF-8?q?=E6=96=B9=E3=81=A7=E3=83=91=E3=83=A9=E3=83=A1=E3=82=BF=E3=83=A9?=
 =?UTF-8?q?=E3=82=A4=E3=82=BA=E3=81=99=E3=82=8B?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

8 つの関数を 3 つに統合:
- test_encode_with_scaling (AV1, VP8, VP9)
- test_encode_scaling_same_resolution (AV1, VP8, VP9)
- test_encode_scaling_multiple_frames (AV1, VP9)
---
 tests/test_encoder_scaling.py | 359 +++++++---------------------------
 1 file changed, 66 insertions(+), 293 deletions(-)

diff --git a/tests/test_encoder_scaling.py b/tests/test_encoder_scaling.py
index 05dde02..1bb370c 100644
--- a/tests/test_encoder_scaling.py
+++ b/tests/test_encoder_scaling.py
@@ -67,290 +67,35 @@ def _make_test_frame(
 
 
 # =============================================================================
-# AV1 スケーリングテスト
+# スケーリングテスト (全コーデック共通)
 # =============================================================================
 
 
-@pytest.mark.parametrize("pixel_format", VideoPixelFormat)
-def test_av1_encode_with_scaling(pixel_format: VideoPixelFormat):
-    """AV1 エンコーダのスケーリング機能テスト (各ピクセルフォーマット)."""
-    # configure: 320x240 (出力解像度)
-    output_width, output_height = 320, 240
-    # encode: 640x480 のフレーム (入力解像度)
-    input_width, input_height = 640, 480
-
-    encoded_chunks = []
-
-    def on_output(chunk):
-        encoded_chunks.append(chunk)
-
-    def on_error(error):
-        pytest.fail(f"Encoder error: {error}")
-
-    encoder = VideoEncoder(on_output, on_error)
-
-    config: VideoEncoderConfig = {
-        "codec": "av01.0.04M.08",
-        "width": output_width,
-        "height": output_height,
-        "bitrate": 500_000,
-        "framerate": 30.0,
-        "latency_mode": LatencyMode.REALTIME,
-    }
-    encoder.configure(config)
-
-    # 入力解像度のフレームを作成
-    frame = _make_test_frame(input_width, input_height, 0, pixel_format)
-    encoder.encode(frame, {"key_frame": True})
-    encoder.flush()
-    frame.close()
-
-    # エンコードが成功していることを確認
-    assert len(encoded_chunks) >= 1
-    assert encoded_chunks[0].byte_length > 0
-    assert encoded_chunks[0].type == EncodedVideoChunkType.KEY
-
-    # デコードして出力解像度を確認
-    decoded_frames = []
-
-    def on_decode_output(frame):
-        decoded_frames.append(frame)
-
-    def on_decode_error(error):
-        pytest.fail(f"Decoder error: {error}")
-
-    decoder = VideoDecoder(on_decode_output, on_decode_error)
-
-    decoder_config: VideoDecoderConfig = {"codec": "av01.0.04M.08"}
-    decoder.configure(decoder_config)
-
-    for chunk in encoded_chunks:
-        decoder.decode(chunk)
-    decoder.flush()
-
-    # デコードされたフレームが出力解像度になっていることを確認
-    assert len(decoded_frames) >= 1
-    for frame in decoded_frames:
-        assert frame.coded_width == output_width
-        assert frame.coded_height == output_height
-        frame.close()
-
-    encoder.close()
-    decoder.close()
-
-
-@pytest.mark.parametrize("pixel_format", VideoPixelFormat)
-def test_av1_encode_scaling_same_resolution(pixel_format: VideoPixelFormat):
-    """AV1 configure と同じ解像度のフレームはスケーリングなしでエンコード (各ピクセルフォーマット)."""
-    width, height = 320, 240
-
-    encoded_chunks = []
-
-    def on_output(chunk):
-        encoded_chunks.append(chunk)
-
-    def on_error(error):
-        pytest.fail(f"Encoder error: {error}")
-
-    encoder = VideoEncoder(on_output, on_error)
-
-    config: VideoEncoderConfig = {
-        "codec": "av01.0.04M.08",
-        "width": width,
-        "height": height,
-        "bitrate": 500_000,
-        "framerate": 30.0,
-        "latency_mode": LatencyMode.REALTIME,
-    }
-    encoder.configure(config)
-
-    # 同じ解像度のフレーム
-    frame = _make_test_frame(width, height, 0, pixel_format)
-    encoder.encode(frame, {"key_frame": True})
-    encoder.flush()
-    frame.close()
-
-    # エンコードが成功していることを確認
-    assert len(encoded_chunks) >= 1
-    assert encoded_chunks[0].byte_length > 0
-
-    encoder.close()
-
-
-@pytest.mark.parametrize("pixel_format", VideoPixelFormat)
-def test_av1_encode_scaling_multiple_frames(pixel_format: VideoPixelFormat):
-    """AV1 複数フレームでのスケーリングテスト (各ピクセルフォーマット)."""
-    # configure: 320x240 (出力解像度)
-    output_width, output_height = 320, 240
-    # encode: 640x480 のフレーム (入力解像度)
-    input_width, input_height = 640, 480
-    num_frames = 3
-
-    encoded_chunks = []
-
-    def on_output(chunk):
-        encoded_chunks.append(chunk)
-
-    def on_error(error):
-        pytest.fail(f"Encoder error: {error}")
-
-    encoder = VideoEncoder(on_output, on_error)
-
-    config: VideoEncoderConfig = {
-        "codec": "av01.0.04M.08",
-        "width": output_width,
-        "height": output_height,
-        "bitrate": 500_000,
-        "framerate": 30.0,
-        "latency_mode": LatencyMode.REALTIME,
-    }
-    encoder.configure(config)
-
-    # 入力解像度のフレームを複数作成・エンコード
-    for i in range(num_frames):
-        frame = _make_test_frame(input_width, input_height, i, pixel_format)
-        encoder.encode(frame, {"key_frame": i == 0})
-        frame.close()
-
-    encoder.flush()
-
-    # エンコードが成功していることを確認
-    assert len(encoded_chunks) >= num_frames
-
-    encoder.close()
-
-
-# =============================================================================
-# VP8 スケーリングテスト
-# =============================================================================
-
-
-@pytest.mark.skipif(
-    platform.system() not in ("Darwin", "Linux"),
-    reason="VP8 は macOS / Linux のみサポート",
-)
-@pytest.mark.parametrize("pixel_format", VideoPixelFormat)
-def test_vp8_encode_with_scaling(pixel_format: VideoPixelFormat):
-    """VP8 エンコーダのスケーリング機能テスト (各ピクセルフォーマット)."""
-    # configure: 320x240 (出力解像度)
-    output_width, output_height = 320, 240
-    # encode: 640x480 のフレーム (入力解像度)
-    input_width, input_height = 640, 480
-
-    encoded_chunks = []
-
-    def on_output(chunk):
-        encoded_chunks.append(chunk)
-
-    def on_error(error):
-        pytest.fail(f"Encoder error: {error}")
-
-    encoder = VideoEncoder(on_output, on_error)
-
-    config: VideoEncoderConfig = {
-        "codec": "vp8",
-        "width": output_width,
-        "height": output_height,
-        "bitrate": 500_000,
-        "framerate": 30.0,
-        "latency_mode": LatencyMode.REALTIME,
-    }
-    encoder.configure(config)
-
-    # 入力解像度のフレームを作成
-    frame = _make_test_frame(input_width, input_height, 0, pixel_format)
-    encoder.encode(frame, {"key_frame": True})
-    encoder.flush()
-    frame.close()
-
-    # エンコードが成功していることを確認
-    assert len(encoded_chunks) >= 1
-    assert encoded_chunks[0].byte_length > 0
-    assert encoded_chunks[0].type == EncodedVideoChunkType.KEY
-
-    # デコードして出力解像度を確認
-    decoded_frames = []
-
-    def on_decode_output(frame):
-        decoded_frames.append(frame)
-
-    def on_decode_error(error):
-        pytest.fail(f"Decoder error: {error}")
-
-    decoder = VideoDecoder(on_decode_output, on_decode_error)
-
-    decoder_config: VideoDecoderConfig = {"codec": "vp8"}
-    decoder.configure(decoder_config)
-
-    for chunk in encoded_chunks:
-        decoder.decode(chunk)
-    decoder.flush()
-
-    # デコードされたフレームが出力解像度になっていることを確認
-    assert len(decoded_frames) >= 1
-    for frame in decoded_frames:
-        assert frame.coded_width == output_width
-        assert frame.coded_height == output_height
-        frame.close()
-
-    encoder.close()
-    decoder.close()
-
-
-@pytest.mark.skipif(
-    platform.system() not in ("Darwin", "Linux"),
-    reason="VP8 は macOS / Linux のみサポート",
-)
-@pytest.mark.parametrize("pixel_format", VideoPixelFormat)
-def test_vp8_encode_scaling_same_resolution(pixel_format: VideoPixelFormat):
-    """VP8 configure と同じ解像度のフレームはスケーリングなしでエンコード (各ピクセルフォーマット)."""
-    width, height = 320, 240
-
-    encoded_chunks = []
-
-    def on_output(chunk):
-        encoded_chunks.append(chunk)
-
-    def on_error(error):
-        pytest.fail(f"Encoder error: {error}")
-
-    encoder = VideoEncoder(on_output, on_error)
-
-    config: VideoEncoderConfig = {
-        "codec": "vp8",
-        "width": width,
-        "height": height,
-        "bitrate": 500_000,
-        "framerate": 30.0,
-        "latency_mode": LatencyMode.REALTIME,
-    }
-    encoder.configure(config)
-
-    # 同じ解像度のフレーム
-    frame = _make_test_frame(width, height, 0, pixel_format)
-    encoder.encode(frame, {"key_frame": True})
-    encoder.flush()
-    frame.close()
-
-    # エンコードが成功していることを確認
-    assert len(encoded_chunks) >= 1
-    assert encoded_chunks[0].byte_length > 0
-
-    encoder.close()
-
-
-# =============================================================================
-# VP9 スケーリングテスト
-# =============================================================================
-
-
-@pytest.mark.skipif(
-    platform.system() not in ("Darwin", "Linux"),
-    reason="VP9 は macOS / Linux のみサポート",
+@pytest.mark.parametrize(
+    "codec",
+    [
+        pytest.param("av01.0.04M.08", id="AV1"),
+        pytest.param(
+            "vp8",
+            marks=pytest.mark.skipif(
+                platform.system() not in ("Darwin", "Linux"),
+                reason="VP8 は macOS / Linux のみサポート",
+            ),
+            id="VP8",
+        ),
+        pytest.param(
+            "vp09.00.10.08",
+            marks=pytest.mark.skipif(
+                platform.system() not in ("Darwin", "Linux"),
+                reason="VP9 は macOS / Linux のみサポート",
+            ),
+            id="VP9",
+        ),
+    ],
 )
 @pytest.mark.parametrize("pixel_format", VideoPixelFormat)
-def test_vp9_encode_with_scaling(pixel_format: VideoPixelFormat):
-    """VP9 エンコーダのスケーリング機能テスト (各ピクセルフォーマット)."""
+def test_encode_with_scaling(codec: str, pixel_format: VideoPixelFormat):
+    """エンコーダのスケーリング機能テスト (各コーデック・各ピクセルフォーマット)."""
     # configure: 320x240 (出力解像度)
     output_width, output_height = 320, 240
     # encode: 640x480 のフレーム (入力解像度)
@@ -367,7 +112,7 @@ def on_error(error):
     encoder = VideoEncoder(on_output, on_error)
 
     config: VideoEncoderConfig = {
-        "codec": "vp09.00.10.08",
+        "codec": codec,
         "width": output_width,
         "height": output_height,
         "bitrate": 500_000,
@@ -398,7 +143,7 @@ def on_decode_error(error):
 
     decoder = VideoDecoder(on_decode_output, on_decode_error)
 
-    decoder_config: VideoDecoderConfig = {"codec": "vp09.00.10.08"}
+    decoder_config: VideoDecoderConfig = {"codec": codec}
     decoder.configure(decoder_config)
 
     for chunk in encoded_chunks:
@@ -416,13 +161,31 @@ def on_decode_error(error):
     decoder.close()
 
 
-@pytest.mark.skipif(
-    platform.system() not in ("Darwin", "Linux"),
-    reason="VP9 は macOS / Linux のみサポート",
+@pytest.mark.parametrize(
+    "codec",
+    [
+        pytest.param("av01.0.04M.08", id="AV1"),
+        pytest.param(
+            "vp8",
+            marks=pytest.mark.skipif(
+                platform.system() not in ("Darwin", "Linux"),
+                reason="VP8 は macOS / Linux のみサポート",
+            ),
+            id="VP8",
+        ),
+        pytest.param(
+            "vp09.00.10.08",
+            marks=pytest.mark.skipif(
+                platform.system() not in ("Darwin", "Linux"),
+                reason="VP9 は macOS / Linux のみサポート",
+            ),
+            id="VP9",
+        ),
+    ],
 )
 @pytest.mark.parametrize("pixel_format", VideoPixelFormat)
-def test_vp9_encode_scaling_same_resolution(pixel_format: VideoPixelFormat):
-    """VP9 configure と同じ解像度のフレームはスケーリングなしでエンコード (各ピクセルフォーマット)."""
+def test_encode_scaling_same_resolution(codec: str, pixel_format: VideoPixelFormat):
+    """configure と同じ解像度のフレームはスケーリングなしでエンコード (各コーデック・各ピクセルフォーマット)."""
     width, height = 320, 240
 
     encoded_chunks = []
@@ -436,7 +199,7 @@ def on_error(error):
     encoder = VideoEncoder(on_output, on_error)
 
     config: VideoEncoderConfig = {
-        "codec": "vp09.00.10.08",
+        "codec": codec,
         "width": width,
         "height": height,
         "bitrate": 500_000,
@@ -458,13 +221,23 @@ def on_error(error):
     encoder.close()
 
 
-@pytest.mark.skipif(
-    platform.system() not in ("Darwin", "Linux"),
-    reason="VP9 は macOS / Linux のみサポート",
+@pytest.mark.parametrize(
+    "codec",
+    [
+        pytest.param("av01.0.04M.08", id="AV1"),
+        pytest.param(
+            "vp09.00.10.08",
+            marks=pytest.mark.skipif(
+                platform.system() not in ("Darwin", "Linux"),
+                reason="VP9 は macOS / Linux のみサポート",
+            ),
+            id="VP9",
+        ),
+    ],
 )
 @pytest.mark.parametrize("pixel_format", VideoPixelFormat)
-def test_vp9_encode_scaling_multiple_frames(pixel_format: VideoPixelFormat):
-    """VP9 複数フレームでのスケーリングテスト (各ピクセルフォーマット)."""
+def test_encode_scaling_multiple_frames(codec: str, pixel_format: VideoPixelFormat):
+    """複数フレームでのスケーリングテスト (各コーデック・各ピクセルフォーマット)."""
     # configure: 320x240 (出力解像度)
     output_width, output_height = 320, 240
     # encode: 640x480 のフレーム (入力解像度)
@@ -482,7 +255,7 @@ def on_error(error):
     encoder = VideoEncoder(on_output, on_error)
 
     config: VideoEncoderConfig = {
-        "codec": "vp09.00.10.08",
+        "codec": codec,
         "width": output_width,
         "height": output_height,
         "bitrate": 500_000,

From 9fc8a7fd906d9c9b284cb872aaefc99cb4ed94b4 Mon Sep 17 00:00:00 2001
From: voluntas <nakai@shiguredo.jp>
Date: Tue, 27 Jan 2026 09:21:02 +0900
Subject: [PATCH 10/12] =?UTF-8?q?Apple=20Video=20Toolbox=20=E3=81=AE=20CVP?=
 =?UTF-8?q?ixelBuffer=20=E3=82=B3=E3=83=94=E3=83=BC=E3=82=92=20libyuv::Cop?=
 =?UTF-8?q?yPlane=20=E3=81=AB=E7=BD=AE=E3=81=8D=E6=8F=9B=E3=81=88=E3=82=8B?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 .../video_encoder_apple_video_toolbox.cpp     | 78 +++++++++----------
 1 file changed, 35 insertions(+), 43 deletions(-)

diff --git a/src/bindings/video_encoder_apple_video_toolbox.cpp b/src/bindings/video_encoder_apple_video_toolbox.cpp
index ad6495b..a4f8b45 100644
--- a/src/bindings/video_encoder_apple_video_toolbox.cpp
+++ b/src/bindings/video_encoder_apple_video_toolbox.cpp
@@ -4,12 +4,13 @@
 #include <CoreFoundation/CoreFoundation.h>
 #include <CoreVideo/CoreVideo.h>
 #include <VideoToolbox/VideoToolbox.h>
+#include <libyuv.h>
 #include <nanobind/nanobind.h>
 #include <memory>
 #include <vector>
 
 #include "encoded_video_chunk.h"
-#include "video_frame.h"  // VideoFrame の完全な定義が必要
+#include "video_frame.h"
 
 namespace nb = nanobind;
 
@@ -513,72 +514,63 @@ void VideoEncoder::encode_frame_videotoolbox(
         int chroma_height = (height + 1) / 2;
 
         // Y plane
-        uint8_t* dst_y = (uint8_t*)CVPixelBufferGetBaseAddressOfPlane(pb, 0);
-        size_t dst_stride_y = CVPixelBufferGetBytesPerRowOfPlane(pb, 0);
-        const uint8_t* src_y = src_frame->plane_ptr(0);
-        for (int i = 0; i < height; ++i) {
-          memcpy(dst_y + i * dst_stride_y, src_y + i * width, width);
-        }
+        libyuv::CopyPlane(
+            src_frame->plane_ptr(0), width,
+            (uint8_t*)CVPixelBufferGetBaseAddressOfPlane(pb, 0),
+            static_cast<int>(CVPixelBufferGetBytesPerRowOfPlane(pb, 0)), width,
+            height);
 
         // U plane
-        uint8_t* dst_u = (uint8_t*)CVPixelBufferGetBaseAddressOfPlane(pb, 1);
-        size_t dst_stride_u = CVPixelBufferGetBytesPerRowOfPlane(pb, 1);
-        const uint8_t* src_u = src_frame->plane_ptr(1);
-        for (int i = 0; i < chroma_height; ++i) {
-          memcpy(dst_u + i * dst_stride_u, src_u + i * chroma_width,
-                 chroma_width);
-        }
+        libyuv::CopyPlane(
+            src_frame->plane_ptr(1), chroma_width,
+            (uint8_t*)CVPixelBufferGetBaseAddressOfPlane(pb, 1),
+            static_cast<int>(CVPixelBufferGetBytesPerRowOfPlane(pb, 1)),
+            chroma_width, chroma_height);
 
         // V plane
-        uint8_t* dst_v = (uint8_t*)CVPixelBufferGetBaseAddressOfPlane(pb, 2);
-        size_t dst_stride_v = CVPixelBufferGetBytesPerRowOfPlane(pb, 2);
-        const uint8_t* src_v = src_frame->plane_ptr(2);
-        for (int i = 0; i < chroma_height; ++i) {
-          memcpy(dst_v + i * dst_stride_v, src_v + i * chroma_width,
-                 chroma_width);
-        }
+        libyuv::CopyPlane(
+            src_frame->plane_ptr(2), chroma_width,
+            (uint8_t*)CVPixelBufferGetBaseAddressOfPlane(pb, 2),
+            static_cast<int>(CVPixelBufferGetBytesPerRowOfPlane(pb, 2)),
+            chroma_width, chroma_height);
         break;
       }
 
       case VideoPixelFormat::BGRA: {
         // BGRA: 単一プレーン
-        uint8_t* dst = (uint8_t*)CVPixelBufferGetBaseAddress(pb);
-        size_t dst_stride = CVPixelBufferGetBytesPerRow(pb);
-        const uint8_t* src = src_frame->plane_ptr(0);
         int width = static_cast<int>(src_frame->width());
         int height = static_cast<int>(src_frame->height());
-        size_t row_bytes = width * 4;
+        int row_bytes = width * 4;
 
-        for (int i = 0; i < height; ++i) {
-          memcpy(dst + i * dst_stride, src + i * row_bytes, row_bytes);
-        }
+        libyuv::CopyPlane(
+            src_frame->plane_ptr(0), row_bytes,
+            (uint8_t*)CVPixelBufferGetBaseAddress(pb),
+            static_cast<int>(CVPixelBufferGetBytesPerRow(pb)), row_bytes,
+            height);
         break;
       }
 
       case VideoPixelFormat::NV12:
       default: {
         // NV12: 2 プレーン (Y, UV)
-        uint8_t* dst_y = (uint8_t*)CVPixelBufferGetBaseAddressOfPlane(pb, 0);
-        size_t dst_stride_y = CVPixelBufferGetBytesPerRowOfPlane(pb, 0);
-        uint8_t* dst_uv = (uint8_t*)CVPixelBufferGetBaseAddressOfPlane(pb, 1);
-        size_t dst_stride_uv = CVPixelBufferGetBytesPerRowOfPlane(pb, 1);
-
-        const uint8_t* src_y = src_frame->plane_ptr(0);
-        const uint8_t* src_uv = src_frame->plane_ptr(1);
         int width = static_cast<int>(src_frame->width());
         int height = static_cast<int>(src_frame->height());
         int chroma_height = (height + 1) / 2;
+        int chroma_row_bytes = ((width + 1) / 2) * 2;
 
         // Y plane
-        for (int i = 0; i < height; ++i) {
-          memcpy(dst_y + i * dst_stride_y, src_y + i * width, width);
-        }
+        libyuv::CopyPlane(
+            src_frame->plane_ptr(0), width,
+            (uint8_t*)CVPixelBufferGetBaseAddressOfPlane(pb, 0),
+            static_cast<int>(CVPixelBufferGetBytesPerRowOfPlane(pb, 0)), width,
+            height);
+
         // UV plane (interleaved)
-        int chroma_row_bytes = ((width + 1) / 2) * 2;
-        for (int i = 0; i < chroma_height; ++i) {
-          memcpy(dst_uv + i * dst_stride_uv, src_uv + i * chroma_row_bytes,
-                 chroma_row_bytes);
-        }
+        libyuv::CopyPlane(
+            src_frame->plane_ptr(1), chroma_row_bytes,
+            (uint8_t*)CVPixelBufferGetBaseAddressOfPlane(pb, 1),
+            static_cast<int>(CVPixelBufferGetBytesPerRowOfPlane(pb, 1)),
+            chroma_row_bytes, chroma_height);
         break;
       }
     }

From 3dd8e2598df73dcbc6f667a0cf0ac91d516ffa66 Mon Sep 17 00:00:00 2001
From: voluntas <nakai@shiguredo.jp>
Date: Tue, 27 Jan 2026 09:21:10 +0900
Subject: [PATCH 11/12] =?UTF-8?q?test=5Fapple=5Fvideo=5Ftoolbox.py=20?=
 =?UTF-8?q?=E3=81=AE=E3=82=B9=E3=82=B1=E3=83=BC=E3=83=AA=E3=83=B3=E3=82=B0?=
 =?UTF-8?q?=E3=83=86=E3=82=B9=E3=83=88=E3=82=92=20codec=20=E3=81=A8=20pixe?=
 =?UTF-8?q?l=5Fformat=20=E3=81=A7=E3=83=91=E3=83=A9=E3=83=A1=E3=82=BF?=
 =?UTF-8?q?=E3=83=A9=E3=82=A4=E3=82=BA=E3=81=99=E3=82=8B?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

4 つの個別テストを 2 つのパラメタライズされたテストに統合:
- test_encode_with_scaling (H264, HEVC × I420, NV12, BGRA)
- test_encode_scaling_same_resolution (H264, HEVC × I420, NV12)
---
 tests/test_apple_video_toolbox.py | 327 ++++++++++--------------------
 1 file changed, 102 insertions(+), 225 deletions(-)

diff --git a/tests/test_apple_video_toolbox.py b/tests/test_apple_video_toolbox.py
index a889402..d090ca4 100644
--- a/tests/test_apple_video_toolbox.py
+++ b/tests/test_apple_video_toolbox.py
@@ -1890,115 +1890,73 @@ def on_decode_error(error):
 
 # =============================================================================
 # スケーリングテスト (VTPixelTransferSession)
+#
+# WebCodecs API 仕様: 「The encoder MUST scale any VideoFrame whose
+# visible width differs from the configured width value」
+#
+# Apple Video Toolbox がサポートするピクセルフォーマット: I420, NV12, BGRA
 # =============================================================================
 
-
-def test_h264_encode_with_scaling():
-    """configure と異なる解像度のフレームを H.264 でエンコードする (スケーリング).
-
-    WebCodecs API 仕様: 「The encoder MUST scale any VideoFrame whose
-    visible width differs from the configured width value」
-    """
-    encoded_chunks = []
-
-    def on_output(chunk):
-        encoded_chunks.append(chunk)
-
-    def on_error(error):
-        pytest.fail(f"Encoder error: {error}")
-
-    encoder = VideoEncoder(on_output, on_error)
-
-    # configure: 640x360 (出力解像度)
-    config: VideoEncoderConfig = {
-        "codec": "avc1.42E01E",
-        "width": 640,
-        "height": 360,
-        "bitrate": 1_000_000,
-        "framerate": 30,
-        "latency_mode": LatencyMode.REALTIME,
-        "hardware_acceleration_engine": HardwareAccelerationEngine.APPLE_VIDEO_TOOLBOX,
-        "avc": {"format": "annexb"},
-    }
-
-    encoder.configure(config)
-
-    # encode: 1280x720 のフレーム (入力解像度)
-    input_width, input_height = 1280, 720
-    data_size = input_width * input_height * 3 // 2  # I420
-    test_frames = []
-
-    for i in range(5):
-        data = np.full(data_size, (i * 50) % 256, dtype=np.uint8)
-        init: VideoFrameBufferInit = {
-            "format": VideoPixelFormat.I420,
-            "coded_width": input_width,
-            "coded_height": input_height,
-            "timestamp": i * 33333,
-        }
-        frame = VideoFrame(data, init)
-        test_frames.append(frame)
-        encoder.encode(frame, {"key_frame": i == 0})
-
-    encoder.flush()
-
-    # エンコードが成功していることを確認
-    assert len(encoded_chunks) >= 5, (
-        f"5 フレームがエンコードされるべき、実際: {len(encoded_chunks)}"
-    )
-
-    # 最初のチャンクがキーフレームであることを確認
-    assert encoded_chunks[0].type == EncodedVideoChunkType.KEY
-
-    # デコードして出力解像度を確認
-    decoded_frames = []
-
-    def on_decode_output(frame):
-        decoded_frames.append(frame)
-
-    def on_decode_error(error):
-        pytest.fail(f"Decoder error: {error}")
-
-    decoder = VideoDecoder(on_decode_output, on_decode_error)
-
-    decoder_config: VideoDecoderConfig = {
-        "codec": "avc1.42E01E",
-        "coded_width": 640,
-        "coded_height": 360,
+# スケーリングテスト用のピクセルフォーマット (Apple Video Toolbox がサポートするもののみ)
+SCALING_PIXEL_FORMATS = [
+    VideoPixelFormat.I420,
+    VideoPixelFormat.NV12,
+    VideoPixelFormat.BGRA,
+]
+
+# 同一解像度テスト用のピクセルフォーマット (BGRA はスケーリング時のみサポート)
+SAME_RESOLUTION_PIXEL_FORMATS = [
+    VideoPixelFormat.I420,
+    VideoPixelFormat.NV12,
+]
+
+
+def _calculate_scaling_frame_data_size(
+    width: int, height: int, pixel_format: VideoPixelFormat
+) -> int:
+    """スケーリングテスト用のピクセルフォーマットに応じたデータサイズを計算する."""
+    match pixel_format:
+        case VideoPixelFormat.I420 | VideoPixelFormat.NV12:
+            return width * height * 3 // 2
+        case VideoPixelFormat.BGRA:
+            return width * height * 4
+        case _:
+            raise ValueError(f"Unsupported pixel format: {pixel_format}")
+
+
+def _make_scaling_test_frame(
+    width: int,
+    height: int,
+    frame_num: int = 0,
+    pixel_format: VideoPixelFormat = VideoPixelFormat.I420,
+) -> VideoFrame:
+    """スケーリングテスト用の VideoFrame を作成する."""
+    data_size = _calculate_scaling_frame_data_size(width, height, pixel_format)
+    data = np.zeros(data_size, dtype=np.uint8)
+    init: VideoFrameBufferInit = {
+        "format": pixel_format,
+        "coded_width": width,
+        "coded_height": height,
+        "timestamp": frame_num * 33333,
     }
-    decoder.configure(decoder_config)
-
-    for chunk in encoded_chunks:
-        decoder.decode(chunk)
-
-    decoder.flush()
-
-    # デコードされたフレームが出力解像度になっていることを確認
-    assert len(decoded_frames) >= 1
-    for frame in decoded_frames:
-        assert frame.coded_width == 640, (
-            f"出力幅が期待値と異なる: 期待値 640, 実際 {frame.coded_width}"
-        )
-        assert frame.coded_height == 360, (
-            f"出力高さが期待値と異なる: 期待値 360, 実際 {frame.coded_height}"
-        )
-
-    print(
-        f"スケーリングテスト成功: 入力 {input_width}x{input_height} -> "
-        f"出力 640x360, エンコードチャンク数: {len(encoded_chunks)}"
-    )
+    return VideoFrame(data, init)
 
-    # クリーンアップ
-    for frame in test_frames:
-        frame.close()
-    for frame in decoded_frames:
-        frame.close()
-    encoder.close()
-    decoder.close()
 
+@pytest.mark.parametrize(
+    "codec",
+    [
+        pytest.param("avc1.42E01E", id="H264"),
+        pytest.param("hvc1.1.6.L93.B0", id="HEVC"),
+    ],
+)
+@pytest.mark.parametrize("pixel_format", SCALING_PIXEL_FORMATS)
+def test_encode_with_scaling(codec: str, pixel_format: VideoPixelFormat):
+    """エンコーダのスケーリング機能テスト (各コーデック・各ピクセルフォーマット)."""
+    # configure: 320x240 (出力解像度)
+    output_width, output_height = 320, 240
+    # encode: 640x480 のフレーム (入力解像度)
+    input_width, input_height = 640, 480
 
-def test_hevc_encode_with_scaling():
-    """configure と異なる解像度のフレームを HEVC でエンコードする (スケーリング)."""
     encoded_chunks = []
 
     def on_output(chunk):
@@ -2009,43 +1967,32 @@ def on_error(error):
 
     encoder = VideoEncoder(on_output, on_error)
 
-    # configure: 640x480 (出力解像度)
+    # コーデックごとのフォーマット設定
     config: VideoEncoderConfig = {
-        "codec": "hvc1.1.6.L93.B0",
-        "width": 640,
-        "height": 480,
-        "bitrate": 1_000_000,
+        "codec": codec,
+        "width": output_width,
+        "height": output_height,
+        "bitrate": 500_000,
         "framerate": 30,
         "latency_mode": LatencyMode.REALTIME,
         "hardware_acceleration_engine": HardwareAccelerationEngine.APPLE_VIDEO_TOOLBOX,
-        "hevc": {"format": "annexb"},
     }
-
+    if codec.startswith("avc"):
+        config["avc"] = {"format": "annexb"}
+    elif codec.startswith("hvc"):
+        config["hevc"] = {"format": "annexb"}
     encoder.configure(config)
 
-    # encode: 1920x1080 のフレーム (入力解像度)
-    input_width, input_height = 1920, 1080
-    data_size = input_width * input_height * 3 // 2  # I420
-    test_frames = []
-
-    for i in range(3):
-        data = np.full(data_size, (i * 80) % 256, dtype=np.uint8)
-        init: VideoFrameBufferInit = {
-            "format": VideoPixelFormat.I420,
-            "coded_width": input_width,
-            "coded_height": input_height,
-            "timestamp": i * 33333,
-        }
-        frame = VideoFrame(data, init)
-        test_frames.append(frame)
-        encoder.encode(frame, {"key_frame": i == 0})
-
+    # 入力解像度のフレームを作成
+    frame = _make_scaling_test_frame(input_width, input_height, 0, pixel_format)
+    encoder.encode(frame, {"key_frame": True})
     encoder.flush()
+    frame.close()
 
     # エンコードが成功していることを確認
-    assert len(encoded_chunks) >= 3, (
-        f"3 フレームがエンコードされるべき、実際: {len(encoded_chunks)}"
-    )
+    assert len(encoded_chunks) >= 1
+    assert encoded_chunks[0].byte_length > 0
+    assert encoded_chunks[0].type == EncodedVideoChunkType.KEY
 
     # デコードして出力解像度を確認
     decoded_frames = []
@@ -2059,94 +2006,39 @@ def on_decode_error(error):
     decoder = VideoDecoder(on_decode_output, on_decode_error)
 
     decoder_config: VideoDecoderConfig = {
-        "codec": "hvc1.1.6.L93.B0",
-        "coded_width": 640,
-        "coded_height": 480,
+        "codec": codec,
+        "coded_width": output_width,
+        "coded_height": output_height,
     }
     decoder.configure(decoder_config)
 
     for chunk in encoded_chunks:
         decoder.decode(chunk)
-
     decoder.flush()
 
     # デコードされたフレームが出力解像度になっていることを確認
     assert len(decoded_frames) >= 1
     for frame in decoded_frames:
-        assert frame.coded_width == 640, (
-            f"出力幅が期待値と異なる: 期待値 640, 実際 {frame.coded_width}"
-        )
-        assert frame.coded_height == 480, (
-            f"出力高さが期待値と異なる: 期待値 480, 実際 {frame.coded_height}"
-        )
-
-    print(
-        f"HEVC スケーリングテスト成功: 入力 {input_width}x{input_height} -> "
-        f"出力 640x480, エンコードチャンク数: {len(encoded_chunks)}"
-    )
-
-    # クリーンアップ
-    for frame in test_frames:
-        frame.close()
-    for frame in decoded_frames:
+        assert frame.coded_width == output_width
+        assert frame.coded_height == output_height
         frame.close()
+
     encoder.close()
     decoder.close()
 
 
-def test_scaling_with_nv12_input():
-    """NV12 形式の入力フレームでスケーリングが動作することを確認."""
-    encoded_chunks = []
-
-    def on_output(chunk):
-        encoded_chunks.append(chunk)
-
-    def on_error(error):
-        pytest.fail(f"Encoder error: {error}")
-
-    encoder = VideoEncoder(on_output, on_error)
-
-    # configure: 320x240 (出力解像度)
-    config: VideoEncoderConfig = {
-        "codec": "avc1.42E01E",
-        "width": 320,
-        "height": 240,
-        "bitrate": 500_000,
-        "framerate": 30,
-        "latency_mode": LatencyMode.REALTIME,
-        "hardware_acceleration_engine": HardwareAccelerationEngine.APPLE_VIDEO_TOOLBOX,
-    }
-
-    encoder.configure(config)
-
-    # encode: 640x480 の NV12 フレーム (入力解像度)
-    input_width, input_height = 640, 480
-    data_size = input_width * input_height * 3 // 2  # NV12
-
-    data = np.zeros(data_size, dtype=np.uint8)
-    init: VideoFrameBufferInit = {
-        "format": VideoPixelFormat.NV12,
-        "coded_width": input_width,
-        "coded_height": input_height,
-        "timestamp": 0,
-    }
-    frame = VideoFrame(data, init)
-    encoder.encode(frame, {"key_frame": True})
-
-    encoder.flush()
-
-    # エンコードが成功していることを確認
-    assert len(encoded_chunks) >= 1, "NV12 スケーリングエンコードに失敗"
-
-    print(f"NV12 スケーリングテスト成功: 入力 {input_width}x{input_height} -> 出力 320x240")
-
-    # クリーンアップ
-    frame.close()
-    encoder.close()
-
+@pytest.mark.parametrize(
+    "codec",
+    [
+        pytest.param("avc1.42E01E", id="H264"),
+        pytest.param("hvc1.1.6.L93.B0", id="HEVC"),
+    ],
+)
+@pytest.mark.parametrize("pixel_format", SAME_RESOLUTION_PIXEL_FORMATS)
+def test_encode_scaling_same_resolution(codec: str, pixel_format: VideoPixelFormat):
+    """configure と同じ解像度のフレームはスケーリングなしでエンコード (各コーデック・各ピクセルフォーマット)."""
+    width, height = 320, 240
 
-def test_scaling_same_resolution():
-    """configure と同じ解像度のフレームはスケーリングなしでエンコードされることを確認."""
     encoded_chunks = []
 
     def on_output(chunk):
@@ -2157,40 +2049,25 @@ def on_error(error):
 
     encoder = VideoEncoder(on_output, on_error)
 
-    # configure と encode で同じ解像度
     config: VideoEncoderConfig = {
-        "codec": "avc1.42E01E",
-        "width": 640,
-        "height": 480,
-        "bitrate": 1_000_000,
+        "codec": codec,
+        "width": width,
+        "height": height,
+        "bitrate": 500_000,
         "framerate": 30,
         "latency_mode": LatencyMode.REALTIME,
         "hardware_acceleration_engine": HardwareAccelerationEngine.APPLE_VIDEO_TOOLBOX,
     }
-
     encoder.configure(config)
 
     # 同じ解像度のフレーム
-    width, height = 640, 480
-    data_size = width * height * 3 // 2  # I420
-
-    data = np.zeros(data_size, dtype=np.uint8)
-    init: VideoFrameBufferInit = {
-        "format": VideoPixelFormat.I420,
-        "coded_width": width,
-        "coded_height": height,
-        "timestamp": 0,
-    }
-    frame = VideoFrame(data, init)
+    frame = _make_scaling_test_frame(width, height, 0, pixel_format)
     encoder.encode(frame, {"key_frame": True})
-
     encoder.flush()
+    frame.close()
 
     # エンコードが成功していることを確認
-    assert len(encoded_chunks) >= 1, "同一解像度エンコードに失敗"
+    assert len(encoded_chunks) >= 1
+    assert encoded_chunks[0].byte_length > 0
 
-    print("同一解像度テスト成功: スケーリングなしでエンコード")
-
-    # クリーンアップ
-    frame.close()
     encoder.close()

From b69f2f641440d49a109040e364d90233a4aebd2c Mon Sep 17 00:00:00 2001
From: voluntas <nakai@shiguredo.jp>
Date: Tue, 27 Jan 2026 09:24:48 +0900
Subject: [PATCH 12/12] =?UTF-8?q?test=5Fencode=5Fscaling=5Fmultiple=5Ffram?=
 =?UTF-8?q?es=20=E3=81=AB=20VP8=20=E3=82=92=E8=BF=BD=E5=8A=A0=E3=81=97=20C?=
 =?UTF-8?q?ODECS=20=E5=AE=9A=E6=95=B0=E3=82=92=E5=B0=8E=E5=85=A5=E3=81=99?=
 =?UTF-8?q?=E3=82=8B?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 tests/test_encoder_scaling.py | 79 ++++++++++-------------------------
 1 file changed, 22 insertions(+), 57 deletions(-)

diff --git a/tests/test_encoder_scaling.py b/tests/test_encoder_scaling.py
index 1bb370c..aed825b 100644
--- a/tests/test_encoder_scaling.py
+++ b/tests/test_encoder_scaling.py
@@ -70,29 +70,28 @@ def _make_test_frame(
 # スケーリングテスト (全コーデック共通)
 # =============================================================================
 
-
-@pytest.mark.parametrize(
-    "codec",
-    [
-        pytest.param("av01.0.04M.08", id="AV1"),
-        pytest.param(
-            "vp8",
-            marks=pytest.mark.skipif(
-                platform.system() not in ("Darwin", "Linux"),
-                reason="VP8 は macOS / Linux のみサポート",
-            ),
-            id="VP8",
+CODECS = [
+    pytest.param("av01.0.04M.08", id="AV1"),
+    pytest.param(
+        "vp8",
+        marks=pytest.mark.skipif(
+            platform.system() not in ("Darwin", "Linux"),
+            reason="VP8 は macOS / Linux のみサポート",
         ),
-        pytest.param(
-            "vp09.00.10.08",
-            marks=pytest.mark.skipif(
-                platform.system() not in ("Darwin", "Linux"),
-                reason="VP9 は macOS / Linux のみサポート",
-            ),
-            id="VP9",
+        id="VP8",
+    ),
+    pytest.param(
+        "vp09.00.10.08",
+        marks=pytest.mark.skipif(
+            platform.system() not in ("Darwin", "Linux"),
+            reason="VP9 は macOS / Linux のみサポート",
         ),
-    ],
-)
+        id="VP9",
+    ),
+]
+
+
+@pytest.mark.parametrize("codec", CODECS)
 @pytest.mark.parametrize("pixel_format", VideoPixelFormat)
 def test_encode_with_scaling(codec: str, pixel_format: VideoPixelFormat):
     """エンコーダのスケーリング機能テスト (各コーデック・各ピクセルフォーマット)."""
@@ -161,28 +160,7 @@ def on_decode_error(error):
     decoder.close()
 
 
-@pytest.mark.parametrize(
-    "codec",
-    [
-        pytest.param("av01.0.04M.08", id="AV1"),
-        pytest.param(
-            "vp8",
-            marks=pytest.mark.skipif(
-                platform.system() not in ("Darwin", "Linux"),
-                reason="VP8 は macOS / Linux のみサポート",
-            ),
-            id="VP8",
-        ),
-        pytest.param(
-            "vp09.00.10.08",
-            marks=pytest.mark.skipif(
-                platform.system() not in ("Darwin", "Linux"),
-                reason="VP9 は macOS / Linux のみサポート",
-            ),
-            id="VP9",
-        ),
-    ],
-)
+@pytest.mark.parametrize("codec", CODECS)
 @pytest.mark.parametrize("pixel_format", VideoPixelFormat)
 def test_encode_scaling_same_resolution(codec: str, pixel_format: VideoPixelFormat):
     """configure と同じ解像度のフレームはスケーリングなしでエンコード (各コーデック・各ピクセルフォーマット)."""
@@ -221,20 +199,7 @@ def on_error(error):
     encoder.close()
 
 
-@pytest.mark.parametrize(
-    "codec",
-    [
-        pytest.param("av01.0.04M.08", id="AV1"),
-        pytest.param(
-            "vp09.00.10.08",
-            marks=pytest.mark.skipif(
-                platform.system() not in ("Darwin", "Linux"),
-                reason="VP9 は macOS / Linux のみサポート",
-            ),
-            id="VP9",
-        ),
-    ],
-)
+@pytest.mark.parametrize("codec", CODECS)
 @pytest.mark.parametrize("pixel_format", VideoPixelFormat)
 def test_encode_scaling_multiple_frames(codec: str, pixel_format: VideoPixelFormat):
     """複数フレームでのスケーリングテスト (各コーデック・各ピクセルフォーマット)."""