From a9fd211fd6a1f50c168f444c76dc96400564421f Mon Sep 17 00:00:00 2001 From: Khushal Malhotra Date: Sat, 28 Feb 2026 02:37:00 +0530 Subject: [PATCH 1/4] feat: Add Pydantic YAML validation for Cornucopia card data - Add Pydantic as dev dependency (v2.12.5) - Create comprehensive Pydantic models for card validation: - Card: Individual card structure with id, value, url, desc, misc, and optional card type - Suit: Container for cards with id and name - Meta: File metadata (edition, component, language, version) - CornucopiaData: Main model allowing extra top-level fields - Integrate validation into convert.py get_language_data function - Add 21 comprehensive unit tests covering all models and edge cases - Validation complements existing FAILSAFE_SCHEMA (#2406) - Strict validation for card structures while allowing flexibility for additional sections Testing: - All 21 tests pass - Successfully validates real webapp-cards-3.0-en.yaml file - Handles Joker cards with 'card' field - Provides clear error messages for validation failures --- Pipfile | 1 + scripts/before_logs.txt | Bin 0 -> 3834 bytes scripts/card_models.py | 46 ++++ scripts/convert.py | 13 ++ tests/scripts/test_card_models.py | 353 ++++++++++++++++++++++++++++++ 5 files changed, 413 insertions(+) create mode 100644 scripts/before_logs.txt create mode 100644 scripts/card_models.py create mode 100644 tests/scripts/test_card_models.py diff --git a/Pipfile b/Pipfile index c1d1207b4..523a20411 100644 --- a/Pipfile +++ b/Pipfile @@ -14,6 +14,7 @@ pytest-cov = "==7.0.0" freezegun = "==1.5.5" security = "==1.3.1" types-pyyaml = "==6.0.12.20250915" +pydantic = "==2.12.5" [packages] idna = "==3.11" diff --git a/scripts/before_logs.txt b/scripts/before_logs.txt new file mode 100644 index 0000000000000000000000000000000000000000..51024de90fc096aea10fb8d4507b30f7f087ae41 GIT binary patch literal 3834 zcmeH~T~8B16o${WiT`2mRwHd_MZg+Oh(cmw>IaudHl=Atf`ns(! zui0Z(+HUWerIsP7YLXb)XTF!#v>kh$&yL_2TVj3Jw-|M-1*Mg*N1Uh@v3?FcW2H@> z!WYeUMWtO&p?|X2adhGAI!$6{MQju32gn%GV)Tvpm9Bl}vNq)#LzB%hRL|LkdSs*7 zjvjPTPU&TV=f7)r7;V`rT9EA#7|m%>2a)=WXI`4l(fNDy(kIADOBx)4M4%6>R~nyy z)&LvB>JWP>%nN=`(Qn`pPGOv3RTv+;@qUhz^qhjyDcofp+t4#^po~un?GU|myFr@p z>cS_yyiAXnZd3S6zQ$$Q(p!6ka$+A#R+Lx53G$l^4U01iO4z+`Urgt2!4+|)?luMQ z8nZS&FA17GbX*S{RlYW2Rk{ebkv)NOF-n$ue8rto)>R(eW4CTr<1>D}KScJ7@c}D` z*pNAkdyM3HRpgwrT6InOOS7M~ulA>#Gz!{Cv*-C;9yv>a zaPGp}Wz=zTa1VSg%RU~HMIR{Nc*Ax}z0ZkzxevbMoW46fwQVqNGuIvayfJUr z4nEwYP3o)S`eHqruW4vAR=`OdNf{F5sa>{`9g4>^=-^=avS$2bHtdVQi zIAOWZ{cBlvS^ij-Ut=(9&a%m}%qk4yJeuM>&aaN?<}IdpX-2jw*UaC;w1$ta5Vf1v zdJQPUs&c3bBzzq-36YqP6BJ#2R^}^)e_1ryF(E1xkE%S%*)11&K99(f7ewWId*dX1 zfJbkM5fnvIMqHk{(GUqOsJ_+>p^n{b0(C@oNS5*^7hV@h?uUufrKpC9tHni4Qyr*s zUI)v1uIBvOV?Da-yu4O#OcuD_TkGOG%lqiEj8J3)|JM6|-&*lP)P#4I(9>J9{7MP6 t)1y{iS*){*l(0LzwXWjPb@Ig;9({BEJ^byuRwDxm(@om#Tj45o?FXC1Pg4K@ literal 0 HcmV?d00001 diff --git a/scripts/card_models.py b/scripts/card_models.py new file mode 100644 index 000000000..36557f6e5 --- /dev/null +++ b/scripts/card_models.py @@ -0,0 +1,46 @@ +# scripts/card_models.py +from pydantic import BaseModel, Field, ValidationError, ConfigDict +from typing import Dict, List, Optional, Any + + +class Card(BaseModel): + """Individual card model matching Cornucopia YAML structure.""" + model_config = ConfigDict(extra='forbid') + + id: str = Field(..., min_length=1, description="Card identifier (e.g., 'VE2', 'ATJ')") + value: str = Field(..., min_length=1, description="Card value (e.g., '2', '3', 'J', 'Q', 'K', 'A')") + url: str = Field(..., min_length=1, description="Card URL") + desc: str = Field(..., min_length=10, description="Card description") + misc: Optional[str] = Field(None, description="Optional miscellaneous information") + card: Optional[str] = Field(None, description="Optional card type (e.g., 'Joker')") + + +class Suit(BaseModel): + """Suit model containing cards.""" + model_config = ConfigDict(extra='forbid') + + id: str = Field(..., min_length=1, description="Suit identifier (e.g., 'VE', 'AT')") + name: str = Field(..., min_length=1, description="Suit name") + cards: List[Card] = Field(default_factory=list, description="List of cards in this suit") + + +class Meta(BaseModel): + """Metadata model for YAML files.""" + model_config = ConfigDict(extra='forbid') + + edition: str = Field(..., min_length=1, description="Edition (e.g., 'webapp', 'mobileapp')") + component: str = Field(..., min_length=1, description="Component (e.g., 'cards')") + language: str = Field(..., min_length=2, description="Language code (e.g., 'EN', 'es')") + version: str = Field(..., min_length=1, description="Version (e.g., '3.0', '1.1')") + + +class CornucopiaData(BaseModel): + """Main model for Cornucopia YAML card data.""" + model_config = ConfigDict(extra='allow') # Allow extra fields at top level + + meta: Meta = Field(..., description="File metadata") + suits: List[Suit] = Field(default_factory=list, description="List of suits containing cards") + + +# Usage example (for testing): +# validated = CornucopiaData(**yaml_data) diff --git a/scripts/convert.py b/scripts/convert.py index 3f60754fc..65b390c2e 100644 --- a/scripts/convert.py +++ b/scripts/convert.py @@ -17,6 +17,7 @@ from pathlib import Path from pathvalidate.argparse import validate_filepath_arg from pathvalidate import sanitize_filepath +from scripts.card_models import CornucopiaData, ValidationError class ConvertVars: @@ -737,6 +738,18 @@ def get_language_data( with open(language_file, "r", encoding="utf-8") as f: try: data = yaml.safe_load(f) + # Validate with Pydantic for card files + if data and "meta" in data and data.get("meta", {}).get("component") == "cards": + try: + validated_data = CornucopiaData(**data) + logging.debug(f" --- YAML validation successful for {language_file}") + # Return the original data structure to maintain compatibility + # but now we know it's valid + data = validated_data.model_dump() + except ValidationError as e: + logging.error(f"Invalid card YAML structure in {language_file}: {e.errors()}") + # Return empty dict to prevent processing invalid data + return {} except yaml.YAMLError as e: logging.error(f"Error loading yaml file: {language_file}. Error = {e}") data = {} diff --git a/tests/scripts/test_card_models.py b/tests/scripts/test_card_models.py new file mode 100644 index 000000000..0902fd479 --- /dev/null +++ b/tests/scripts/test_card_models.py @@ -0,0 +1,353 @@ +# tests/scripts/test_card_models.py +import pytest +from scripts.card_models import Card, Suit, Meta, CornucopiaData, ValidationError + + +class TestCard: + """Test cases for Card model.""" + + def test_valid_card(self): + """Test creating a valid card.""" + data = { + "id": "VE2", + "value": "2", + "url": "https://cornucopia.owasp.org/cards/VE2", + "desc": "Test description for validation attacks" + } + card = Card(**data) + assert card.id == "VE2" + assert card.value == "2" + assert card.url == "https://cornucopia.owasp.org/cards/VE2" + assert card.desc == "Test description for validation attacks" + assert card.misc is None + + def test_card_with_card_type(self): + """Test creating a card with card type (e.g., Joker).""" + data = { + "id": "JOA", + "value": "A", + "url": "https://cornucopia.owasp.org/cards/JOA", + "desc": "Alice can utilize the application to attack users' systems and data", + "card": "Joker" + } + card = Card(**data) + assert card.card == "Joker" + + def test_card_with_misc(self): + """Test creating a card with miscellaneous information.""" + data = { + "id": "VEA", + "value": "A", + "url": "https://cornucopia.owasp.org/cards/VEA", + "desc": "Test description for ace card", + "misc": "Additional information about this card" + } + card = Card(**data) + assert card.misc == "Additional information about this card" + + def test_missing_id(self): + """Test validation error when id is missing.""" + data = { + "value": "2", + "url": "https://cornucopia.owasp.org/cards/VE2", + "desc": "Test description" + } + with pytest.raises(ValidationError) as exc: + Card(**data) + assert "id" in str(exc.value) + + def test_missing_value(self): + """Test validation error when value is missing.""" + data = { + "id": "VE2", + "url": "https://cornucopia.owasp.org/cards/VE2", + "desc": "Test description" + } + with pytest.raises(ValidationError) as exc: + Card(**data) + assert "value" in str(exc.value) + + def test_missing_url(self): + """Test validation error when url is missing.""" + data = { + "id": "VE2", + "value": "2", + "desc": "Test description" + } + with pytest.raises(ValidationError) as exc: + Card(**data) + assert "url" in str(exc.value) + + def test_missing_desc(self): + """Test validation error when description is missing.""" + data = { + "id": "VE2", + "value": "2", + "url": "https://cornucopia.owasp.org/cards/VE2" + } + with pytest.raises(ValidationError) as exc: + Card(**data) + assert "desc" in str(exc.value) + + def test_short_description(self): + """Test validation error when description is too short.""" + data = { + "id": "VE2", + "value": "2", + "url": "https://cornucopia.owasp.org/cards/VE2", + "desc": "Short" + } + with pytest.raises(ValidationError) as exc: + Card(**data) + assert "desc" in str(exc.value) + + def test_invalid_type(self): + """Test validation error when fields have wrong types.""" + data = { + "id": 123, # Should be string + "value": "2", + "url": "https://cornucopia.owasp.org/cards/VE2", + "desc": "Test description for validation attacks" + } + with pytest.raises(ValidationError): + Card(**data) + + def test_unknown_field(self): + """Test validation error when unknown field is present.""" + data = { + "id": "VE2", + "value": "2", + "url": "https://cornucopia.owasp.org/cards/VE2", + "desc": "Test description for validation attacks", + "unknown_field": "extra data" + } + with pytest.raises(ValidationError) as exc: + Card(**data) + assert "extra" in str(exc.value).lower() + + +class TestSuit: + """Test cases for Suit model.""" + + def test_valid_suit(self): + """Test creating a valid suit.""" + cards = [ + { + "id": "VE2", + "value": "2", + "url": "https://cornucopia.owasp.org/cards/VE2", + "desc": "Test description for card 2" + }, + { + "id": "VE3", + "value": "3", + "url": "https://cornucopia.owasp.org/cards/VE3", + "desc": "Test description for card 3" + } + ] + data = { + "id": "VE", + "name": "DATA VALIDATION & ENCODING", + "cards": cards + } + suit = Suit(**data) + assert suit.id == "VE" + assert suit.name == "DATA VALIDATION & ENCODING" + assert len(suit.cards) == 2 + assert suit.cards[0].id == "VE2" + + def test_empty_cards(self): + """Test creating a suit with no cards.""" + data = { + "id": "VE", + "name": "DATA VALIDATION & ENCODING", + "cards": [] + } + suit = Suit(**data) + assert len(suit.cards) == 0 + + def test_missing_id(self): + """Test validation error when suit id is missing.""" + data = { + "name": "DATA VALIDATION & ENCODING", + "cards": [] + } + with pytest.raises(ValidationError) as exc: + Suit(**data) + assert "id" in str(exc.value) + + +class TestMeta: + """Test cases for Meta model.""" + + def test_valid_meta(self): + """Test creating valid metadata.""" + data = { + "edition": "webapp", + "component": "cards", + "language": "EN", + "version": "3.0" + } + meta = Meta(**data) + assert meta.edition == "webapp" + assert meta.component == "cards" + assert meta.language == "EN" + assert meta.version == "3.0" + + def test_missing_edition(self): + """Test validation error when edition is missing.""" + data = { + "component": "cards", + "language": "EN", + "version": "3.0" + } + with pytest.raises(ValidationError) as exc: + Meta(**data) + assert "edition" in str(exc.value) + + def test_invalid_language_code(self): + """Test validation error when language code is too short.""" + data = { + "edition": "webapp", + "component": "cards", + "language": "E", # Too short + "version": "3.0" + } + with pytest.raises(ValidationError) as exc: + Meta(**data) + assert "language" in str(exc.value) + + +class TestCornucopiaData: + """Test cases for CornucopiaData model.""" + + def test_valid_complete_data(self): + """Test creating valid complete Cornucopia data.""" + data = { + "meta": { + "edition": "webapp", + "component": "cards", + "language": "EN", + "version": "3.0" + }, + "suits": [ + { + "id": "VE", + "name": "DATA VALIDATION & ENCODING", + "cards": [ + { + "id": "VE2", + "value": "2", + "url": "https://cornucopia.owasp.org/cards/VE2", + "desc": "Test description for validation attacks" + } + ] + } + ] + } + cornucopia = CornucopiaData(**data) + assert cornucopia.meta.edition == "webapp" + assert len(cornucopia.suits) == 1 + assert cornucopia.suits[0].id == "VE" + assert len(cornucopia.suits[0].cards) == 1 + + def test_empty_suits(self): + """Test creating data with no suits.""" + data = { + "meta": { + "edition": "webapp", + "component": "cards", + "language": "EN", + "version": "3.0" + }, + "suits": [] + } + cornucopia = CornucopiaData(**data) + assert len(cornucopia.suits) == 0 + + def test_missing_meta(self): + """Test validation error when meta is missing.""" + data = { + "suits": [] + } + with pytest.raises(ValidationError) as exc: + CornucopiaData(**data) + assert "meta" in str(exc.value) + + def test_unknown_top_level_field(self): + """Test that extra top-level fields are allowed.""" + data = { + "meta": { + "edition": "webapp", + "component": "cards", + "language": "EN", + "version": "3.0" + }, + "suits": [], + "unknown_section": "extra data" + } + # This should now pass since we allow extra fields at top level + cornucopia = CornucopiaData(**data) + assert cornucopia.meta.edition == "webapp" + assert len(cornucopia.suits) == 0 + + +class TestRealWorldValidation: + """Test cases based on real YAML structure.""" + + def test_realistic_card_structure(self): + """Test with realistic card data structure.""" + data = { + "meta": { + "edition": "webapp", + "component": "cards", + "language": "EN", + "version": "3.0" + }, + "suits": [ + { + "id": "VE", + "name": "DATA VALIDATION & ENCODING", + "cards": [ + { + "id": "VE2", + "value": "2", + "url": "https://cornucopia.owasp.org/cards/VE2", + "desc": "Brian can gather information about the underlying configurations, schemas, logic, code, software, services and infrastructure due to the content of error messages, or poor configuration, or the presence of default installation files or old, test, backup or copies of resources, or exposure of source code" + }, + { + "id": "VEA", + "value": "A", + "url": "https://cornucopia.owasp.org/cards/VEA", + "desc": "You have invented a new attack against Data Validation and Encoding", + "misc": "Read more about this topic in OWASP's free Cheat Sheets on Input Validation, XSS Prevention, DOM-based XSS Prevention, SQL Injection Prevention, and Query Parameterization" + } + ] + }, + { + "id": "AT", + "name": "AUTHENTICATION", + "cards": [ + { + "id": "AT2", + "value": "2", + "url": "https://cornucopia.owasp.org/cards/AT2", + "desc": "James can undertake authentication functions without the real user ever being aware this has occurred (e.g. attempt to log in, log in with stolen credentials, reset the password)" + } + ] + } + ] + } + + # This should validate successfully + cornucopia = CornucopiaData(**data) + assert len(cornucopia.suits) == 2 + assert cornucopia.suits[0].id == "VE" + assert len(cornucopia.suits[0].cards) == 2 + assert cornucopia.suits[1].id == "AT" + assert len(cornucopia.suits[1].cards) == 1 + + # Check the ace card has misc info + vea_card = next(card for card in cornucopia.suits[0].cards if card.id == "VEA") + assert vea_card.misc is not None + assert "OWASP" in vea_card.misc From 96eefd907486f30b6a68ee3d2e4bcc0fa4f67bcd Mon Sep 17 00:00:00 2001 From: Khushal Malhotra Date: Sat, 28 Feb 2026 03:11:14 +0530 Subject: [PATCH 2/4] fix: Restrict cflite_pr.yml to master branch only - Prevents ClusterFuzzLite from running on feature branches - Only runs on master pull requests as intended - Fixes workflow failures on feature branches --- .github/workflows/cflite_pr.yml | 36 +++++++++++++++++---------------- 1 file changed, 19 insertions(+), 17 deletions(-) diff --git a/.github/workflows/cflite_pr.yml b/.github/workflows/cflite_pr.yml index 35eca4a2f..0334e5bd5 100644 --- a/.github/workflows/cflite_pr.yml +++ b/.github/workflows/cflite_pr.yml @@ -1,6 +1,8 @@ name: ClusterFuzzLite PR fuzzing on: pull_request: + branches: + - master paths: - scripts/convert** - tests/scripts/convert** @@ -24,21 +26,21 @@ jobs: fail-fast: false matrix: sanitizer: - - address - - undefined + - address + - undefined steps: - - name: Build Fuzzers (${{ matrix.sanitizer }}) - id: build - uses: google/clusterfuzzlite/actions/build_fuzzers@52ecc61cb587ee99c26825a112a21abf19c7448c # main - with: - language: python - github-token: ${{ secrets.GITHUB_TOKEN }} - sanitizer: ${{ matrix.sanitizer }} - - name: Run Fuzzers (${{ matrix.sanitizer }}) - id: run - uses: google/clusterfuzzlite/actions/run_fuzzers@52ecc61cb587ee99c26825a112a21abf19c7448c # main - with: - github-token: ${{ secrets.GITHUB_TOKEN }} - fuzz-seconds: 600 - mode: 'code-change' - sanitizer: ${{ matrix.sanitizer }} + - name: Build Fuzzers (${{ matrix.sanitizer }}) + id: build + uses: google/clusterfuzzlite/actions/build_fuzzers@52ecc61cb587ee99c26825a112a21abf19c7448c # main + with: + language: python + github-token: ${{ secrets.GITHUB_TOKEN }} + sanitizer: ${{ matrix.sanitizer }} + - name: Run Fuzzers (${{ matrix.sanitizer }}) + id: run + uses: google/clusterfuzzlite/actions/run_fuzzers@52ecc61cb587ee99c26825a112a21abf19c7448c # main + with: + github-token: ${{ secrets.GITHUB_TOKEN }} + fuzz-seconds: 600 + mode: "code-change" + sanitizer: ${{ matrix.sanitizer }} From 4ba9af78d1d3d73dd7d0087934acc33c38854353 Mon Sep 17 00:00:00 2001 From: Khushal Malhotra Date: Sat, 28 Feb 2026 03:18:37 +0530 Subject: [PATCH 3/4] fix: Resolve YAML syntax errors in workflow files - Fix duplicate 'uses' in scorecard.yml (removed duplicate line) - Fix multi-line Python script in smoke-tests.yaml (converted to single line) - Both workflow files now pass YAML validation - Resolves GitHub Actions workflow syntax errors --- .github/workflows/scorecard.yml | 8 ++------ .github/workflows/smoke-tests.yaml | 18 +++++++----------- 2 files changed, 9 insertions(+), 17 deletions(-) diff --git a/.github/workflows/scorecard.yml b/.github/workflows/scorecard.yml index 903eb95d6..6caebc6e0 100644 --- a/.github/workflows/scorecard.yml +++ b/.github/workflows/scorecard.yml @@ -4,9 +4,9 @@ on: branch_protection_rule: schedule: # Weekly on Saturdays. - - cron: '30 1 * * 6' + - cron: "30 1 * * 6" push: - branches: [ main, master ] + branches: [main, master] # Declare default permissions as read only. permissions: read-all @@ -57,10 +57,6 @@ jobs: # required for Code scanning alerts - name: "Upload SARIF results to code scanning" - - uses: github/codeql-action/upload-sarif@b20883b0cd1f46c72ae0ba6d1090936928f9fa30 # v3.29.5 - uses: github/codeql-action/upload-sarif@9e907b5e64f6b83e7804b09294d44122997950d6 # v3.29.5 - with: sarif_file: results.sarif diff --git a/.github/workflows/smoke-tests.yaml b/.github/workflows/smoke-tests.yaml index 916871938..693328499 100644 --- a/.github/workflows/smoke-tests.yaml +++ b/.github/workflows/smoke-tests.yaml @@ -2,14 +2,14 @@ name: Smoke Tests on: workflow_dispatch: schedule: - - cron: '0 6 * * *' + - cron: "0 6 * * *" push: branches: - master paths: - - 'copi.owasp.org/**' - - 'tests/scripts/smoke_tests.py' - - '.github/workflows/smoke-tests.yaml' + - "copi.owasp.org/**" + - "tests/scripts/smoke_tests.py" + - ".github/workflows/smoke-tests.yaml" permissions: contents: read @@ -27,8 +27,8 @@ jobs: - name: Get Python uses: actions/setup-python@a309ff8b426b58ec0e2a45f0f869d46889d02405 # v6.2.0 with: - python-version: '3.12' - cache: 'pipenv' + python-version: "3.12" + cache: "pipenv" - name: Install dependencies run: | @@ -50,11 +50,7 @@ jobs: docker build -t copi-test-image copi.owasp.org echo "Generating SECRET_KEY_BASE" - SECRET_KEY_BASE=$(python - <<'PY' -import secrets -print(secrets.token_hex(64)) -PY -) + SECRET_KEY_BASE=$(python -c 'import secrets; print(secrets.token_hex(64))') echo "Starting Copi application container" docker run -d --name copi-app --network copi-net -p 4000:4000 \ From 6e73026ee9dc38e2e93250b5142d9d98e6dec5fc Mon Sep 17 00:00:00 2001 From: Khushal Malhotra Date: Sat, 28 Feb 2026 03:22:38 +0530 Subject: [PATCH 4/4] revert: Restore cflite_pr.yml to original state - Remove branch restriction from cflite_pr.yml - Restores original behavior (runs on all PRs with matching paths) - Maintains consistency with master branch configuration --- .github/workflows/cflite_pr.yml | 2 -- 1 file changed, 2 deletions(-) diff --git a/.github/workflows/cflite_pr.yml b/.github/workflows/cflite_pr.yml index 0334e5bd5..692b16c4a 100644 --- a/.github/workflows/cflite_pr.yml +++ b/.github/workflows/cflite_pr.yml @@ -1,8 +1,6 @@ name: ClusterFuzzLite PR fuzzing on: pull_request: - branches: - - master paths: - scripts/convert** - tests/scripts/convert**