diff --git a/everyrow-mcp/deploy/docker-compose.local.yaml b/everyrow-mcp/deploy/docker-compose.local.yaml index c963e846..3d288716 100644 --- a/everyrow-mcp/deploy/docker-compose.local.yaml +++ b/everyrow-mcp/deploy/docker-compose.local.yaml @@ -1,4 +1,4 @@ -# Local development overrides — NOT for production use. +# Local development overrides. # Usage: docker compose -f docker-compose.yaml -f docker-compose.local.yaml up services: redis: @@ -9,4 +9,5 @@ services: environment: MCP_SERVER_URL: "${MCP_SERVER_URL:-http://localhost:8000}" TRUST_PROXY_HEADERS: "${TRUST_PROXY_HEADERS:-false}" - EXTRA_ALLOWED_HOSTS: "host.docker.internal:*" # local dev only — widens DNS rebinding allowlist + ENABLE_SHEETS_TOOLS: "${ENABLE_SHEETS_TOOLS:-false}" + EXTRA_ALLOWED_HOSTS: "${EXTRA_ALLOWED_HOSTS:-}" diff --git a/everyrow-mcp/manifest.json b/everyrow-mcp/manifest.json index 2949ddfd..76d990bb 100644 --- a/everyrow-mcp/manifest.json +++ b/everyrow-mcp/manifest.json @@ -92,6 +92,26 @@ { "name": "everyrow_use_list", "description": "Import a reference list into your session and save it as a CSV file." + }, + { + "name": "sheets_list", + "description": "List the user's Google Sheets, optionally filtered by name." + }, + { + "name": "sheets_read", + "description": "Read data from a Google Sheet and return it as JSON records." + }, + { + "name": "sheets_write", + "description": "Write data to a Google Sheet." + }, + { + "name": "sheets_create", + "description": "Create a new Google Sheet, optionally populated with data." + }, + { + "name": "sheets_info", + "description": "Get metadata about a Google Sheet: title, sheet names, and dimensions." } ], "user_config": { diff --git a/everyrow-mcp/src/everyrow_mcp/auth.py b/everyrow-mcp/src/everyrow_mcp/auth.py index f2a46ff4..99228e0c 100644 --- a/everyrow-mcp/src/everyrow_mcp/auth.py +++ b/everyrow-mcp/src/everyrow_mcp/auth.py @@ -128,12 +128,15 @@ class EveryRowAuthorizationCode(AuthorizationCode): supabase_access_token: str supabase_refresh_token: str + google_access_token: str = "" + google_refresh_token: str = "" class EveryRowRefreshToken(RefreshToken): """Extends RefreshToken with the Supabase refresh token.""" supabase_refresh_token: str + google_refresh_token: str = "" class SupabaseTokenResponse(BaseModel): @@ -141,6 +144,8 @@ class SupabaseTokenResponse(BaseModel): access_token: str refresh_token: str + provider_token: str = "" + provider_refresh_token: str = "" class PendingAuth(BaseModel): @@ -239,6 +244,10 @@ def _supabase_redirect_url(supabase_verifier: str) -> str: 'flow_type': 'pkce', 'code_challenge': supabase_challenge, 'code_challenge_method': 's256', + 'scopes': ( + 'https://www.googleapis.com/auth/spreadsheets ' + 'https://www.googleapis.com/auth/drive.metadata.readonly' + ), } ) }" @@ -391,6 +400,8 @@ async def _create_authorisation_code( resource=pending.params.resource, supabase_access_token=supa_tokens.access_token, supabase_refresh_token=supa_tokens.refresh_token, + google_access_token=supa_tokens.provider_token, + google_refresh_token=supa_tokens.provider_refresh_token, ) await self._redis.setex( name=build_key("authcode", code), @@ -449,6 +460,8 @@ async def _issue_token_response( client_id: str, scopes: list[str], supabase_refresh_token: str, + google_access_token: str = "", + google_refresh_token: str = "", ) -> OAuthToken: # SECURITY: Extract exp from the Supabase JWT without signature # verification. This is safe ONLY because the token was just received @@ -462,12 +475,31 @@ async def _issue_token_response( ) expires_in = max(0, jwt_claims.get("exp", 0) - int(time.time())) + # Store Google tokens in Redis for Sheets tools + if google_access_token: + from everyrow_mcp.sheets_client import store_google_token # noqa: PLC0415 + + try: + await store_google_token( + jwt_claims.get("sub", "unknown"), + google_access_token, + google_refresh_token or None, + expires_in=expires_in, + ) + except Exception: + logger.error( + "Google token storage failed for user=%s — Sheets tools will be unavailable", + jwt_claims.get("sub", "unknown"), + exc_info=True, + ) + rt_str = secrets.token_urlsafe(32) rt = EveryRowRefreshToken( token=rt_str, client_id=client_id, scopes=scopes, supabase_refresh_token=supabase_refresh_token, + google_refresh_token=google_refresh_token, ) await self._redis.setex( name=build_key("refresh", rt_str), @@ -494,6 +526,8 @@ async def exchange_authorization_code( client_id=client.client_id, scopes=authorization_code.scopes, supabase_refresh_token=authorization_code.supabase_refresh_token, + google_access_token=authorization_code.google_access_token, + google_refresh_token=authorization_code.google_refresh_token, ) async def load_access_token(self, token: str) -> AccessToken | None: @@ -556,6 +590,9 @@ async def exchange_refresh_token( value=encrypt_value(refresh_token.model_dump_json()), ) raise + google_refresh = ( + supa_tokens.provider_refresh_token or refresh_token.google_refresh_token + ) assert client.client_id is not None logger.info("Token refresh successful user=%s", client.client_id) return await self._issue_token_response( @@ -563,6 +600,8 @@ async def exchange_refresh_token( client_id=client.client_id, scopes=final_scopes, supabase_refresh_token=supa_tokens.refresh_token, + google_access_token=supa_tokens.provider_token, + google_refresh_token=google_refresh, ) async def revoke_token(self, token: AccessToken | EveryRowRefreshToken) -> None: diff --git a/everyrow-mcp/src/everyrow_mcp/config.py b/everyrow-mcp/src/everyrow_mcp/config.py index 732e554d..2cbedb27 100644 --- a/everyrow-mcp/src/everyrow_mcp/config.py +++ b/everyrow-mcp/src/everyrow_mcp/config.py @@ -125,6 +125,16 @@ class Settings(BaseSettings): description="Upload rate limit sliding window in seconds (1 hour)", ) + enable_sheets_tools: bool = Field( + default=False, + description="Enable Google Sheets tools (requires HTTP mode with Google OAuth)", + ) + sheets_rate_limit: PositiveInt = Field( + default=60, description="Max sheets ops per user per rate window" + ) + sheets_rate_window: PositiveInt = Field( + default=60, description="Sheets rate limit window in seconds" + ) everyrow_api_key: str | None = Field(default=None, repr=False) @property diff --git a/everyrow-mcp/src/everyrow_mcp/models.py b/everyrow-mcp/src/everyrow_mcp/models.py index 2a5ebc98..9caa4c66 100644 --- a/everyrow-mcp/src/everyrow_mcp/models.py +++ b/everyrow-mcp/src/everyrow_mcp/models.py @@ -701,6 +701,12 @@ def validate_task_id(cls, v: str) -> str: description="Full absolute path to the output CSV file (must end in .csv). " "Optional — results are returned as a paginated preview by default.", ) + output_spreadsheet_title: str | None = Field( + default=None, + description="Create a new Google Sheet with this title and write the full " + "results there. Returns the spreadsheet URL. Fails if a sheet with " + "this exact title already exists — pick a unique name.", + ) offset: int = Field( default=0, description="Row offset for pagination. Default 0 returns the first page.", diff --git a/everyrow-mcp/src/everyrow_mcp/server.py b/everyrow-mcp/src/everyrow_mcp/server.py index 2a659583..9ae0d10a 100644 --- a/everyrow-mcp/src/everyrow_mcp/server.py +++ b/everyrow-mcp/src/everyrow_mcp/server.py @@ -80,6 +80,10 @@ def main(): settings.transport = transport.value mcp._mcp_server.instructions = get_instructions(is_http=input_args.http) + # Register sheets tools after transport is set (they require HTTP mode) + if settings.enable_sheets_tools and settings.is_http: + import everyrow_mcp.sheets_tools # noqa: F401, PLC0415 + # tools.py registers everyrow_results_stdio by default. # Override with the HTTP variant when running in HTTP mode. # ToolManager.add_tool() is a no-op for existing names, so remove first. @@ -92,6 +96,13 @@ def main(): meta=_RESULTS_META, )(everyrow_results_http) + # Strip output_spreadsheet_title from results schema when sheets disabled + if not settings.enable_sheets_tools: + tool = mcp._tool_manager.get_tool("everyrow_results") + if tool: + http_def = tool.parameters.get("$defs", {}).get("HttpResultsInput", {}) + http_def.get("properties", {}).pop("output_spreadsheet_title", None) + if input_args.http: # ── HTTP mode logging ────────────────────────────────────── # INFO level so operational events show up in Cloud Logging. diff --git a/everyrow-mcp/src/everyrow_mcp/sheets_client.py b/everyrow-mcp/src/everyrow_mcp/sheets_client.py new file mode 100644 index 00000000..a26ee968 --- /dev/null +++ b/everyrow-mcp/src/everyrow_mcp/sheets_client.py @@ -0,0 +1,310 @@ +"""Async Google Sheets API client using httpx. + +Handles token resolution for HTTP mode (Redis-stored OAuth tokens obtained +during the Supabase/Google OAuth flow). Sheets tools are not available in +stdio mode. +""" + +from __future__ import annotations + +import json +import logging +import re +import time +from typing import Any + +import httpx + +from everyrow_mcp.redis_store import ( + build_key, + decrypt_value, + encrypt_value, + get_redis_client, +) + +logger = logging.getLogger(__name__) + +SHEETS_API_BASE = "https://sheets.googleapis.com/v4/spreadsheets" +DRIVE_API_BASE = "https://www.googleapis.com/drive/v3" + +# Google token TTL and refresh buffer +GOOGLE_TOKEN_TTL_DEFAULT = 3600 # 1 hour +GOOGLE_TOKEN_REFRESH_BUFFER = 300 # refresh 5 min before expiry + + +# ── Token resolution ────────────────────────────────────────────────── + + +async def get_google_token(user_id: str | None = None) -> str: + """Resolve a valid Google access token from Redis. + + The token is stored during the OAuth callback when the user logs in + via Google through Supabase. Auto-refreshes if near expiry. + + Only available in HTTP mode — sheets tools are removed in stdio mode. + """ + if user_id is None: + from mcp.server.auth.middleware.auth_context import ( # noqa: PLC0415 + get_access_token, + ) + + access_token = get_access_token() + user_id = access_token.client_id if access_token else None + if not user_id: + raise RuntimeError( + "No authenticated user. The user must log in with Google " + "(with Sheets scopes) to use Google Sheets tools." + ) + + redis = get_redis_client() + + token_key = build_key("google_token", user_id) + raw = await redis.get(token_key) + if raw: + data = json.loads(decrypt_value(raw)) + expires_at = data.get("expires_at", 0) + if time.time() < expires_at - GOOGLE_TOKEN_REFRESH_BUFFER: + return data["access_token"] + + # Token near expiry — try to refresh + refresh_token = data.get("refresh_token") + if refresh_token: + try: + return await _refresh_google_token_http(refresh_token, user_id) + except Exception as e: + logger.warning("Failed to refresh Google token: %s", type(e).__name__) + if time.time() < expires_at: + return data["access_token"] + + raise RuntimeError( + "No Google token available. The user must log in with Google " + "(with Sheets scopes) to use Google Sheets tools." + ) + + +async def _refresh_google_token_http(refresh_token: str, user_id: str) -> str: + """Refresh a Google access token using the Supabase-stored refresh token.""" + from everyrow_mcp.config import settings # noqa: PLC0415 + + async with httpx.AsyncClient(timeout=10.0) as client: + # Refresh through Supabase which proxies to Google + resp = await client.post( + f"{settings.supabase_url}/auth/v1/token?grant_type=refresh_token", + json={"refresh_token": refresh_token}, + headers={ + "apikey": settings.supabase_anon_key, + "Content-Type": "application/json", + }, + ) + resp.raise_for_status() + data = resp.json() + + provider_token = data.get("provider_token", "") + provider_refresh_token = data.get("provider_refresh_token", refresh_token) + expires_in = data.get("expires_in") + + if not provider_token: + raise RuntimeError("Supabase refresh did not return a Google provider_token") + + await store_google_token( + user_id, provider_token, provider_refresh_token, expires_in=expires_in + ) + return provider_token + + +async def store_google_token( + user_id: str, + access_token: str, + refresh_token: str | None = None, + *, + expires_in: int | None = None, +) -> None: + """Store Google access token in Redis with TTL.""" + try: + redis = get_redis_client() + except Exception: + logger.error("Failed to obtain Redis client for Google token storage") + raise + ttl = expires_in if expires_in and expires_in > 0 else GOOGLE_TOKEN_TTL_DEFAULT + try: + data: dict[str, Any] = { + "access_token": access_token, + "expires_at": time.time() + ttl, + } + if refresh_token: + data["refresh_token"] = refresh_token + await redis.setex( + build_key("google_token", user_id), + ttl, + encrypt_value(json.dumps(data)), + ) + except Exception: + logger.error("Failed to store Google token in Redis for %s", user_id) + raise + + +# ── Sheets API client ───────────────────────────────────────────────── + + +class GoogleSheetsClient: + """Async Google Sheets API v4 client.""" + + def __init__(self, access_token: str) -> None: + self._token = access_token + self._client = httpx.AsyncClient( + timeout=30.0, + headers={ + "Authorization": f"Bearer {access_token}", + "Content-Type": "application/json", + }, + ) + + async def close(self) -> None: + await self._client.aclose() + + async def __aenter__(self) -> GoogleSheetsClient: + return self + + async def __aexit__(self, *args: Any) -> None: + await self.close() + + async def read_range( + self, spreadsheet_id: str, cell_range: str = "Sheet1" + ) -> list[list[str]]: + """Read values from a spreadsheet range. + + Returns a 2D list of strings (rows x columns). + """ + resp = await self._client.get( + f"{SHEETS_API_BASE}/{spreadsheet_id}/values/{cell_range}", + params={"valueRenderOption": "FORMATTED_VALUE"}, + ) + resp.raise_for_status() + data = resp.json() + return data.get("values", []) + + async def write_range( + self, + spreadsheet_id: str, + cell_range: str, + values: list[list[str]], + ) -> dict[str, Any]: + """Write values to a spreadsheet range (overwrite).""" + resp = await self._client.put( + f"{SHEETS_API_BASE}/{spreadsheet_id}/values/{cell_range}", + params={"valueInputOption": "USER_ENTERED"}, + json={"values": values}, + ) + resp.raise_for_status() + return resp.json() + + async def append_range( + self, + spreadsheet_id: str, + cell_range: str, + values: list[list[str]], + ) -> dict[str, Any]: + """Append values after existing data in a range.""" + resp = await self._client.post( + f"{SHEETS_API_BASE}/{spreadsheet_id}/values/{cell_range}:append", + params={ + "valueInputOption": "USER_ENTERED", + "insertDataOption": "INSERT_ROWS", + }, + json={"values": values}, + ) + resp.raise_for_status() + return resp.json() + + async def create_spreadsheet(self, title: str) -> dict[str, Any]: + """Create a new spreadsheet. Returns metadata with spreadsheetId and URL.""" + resp = await self._client.post( + SHEETS_API_BASE, + json={"properties": {"title": title}}, + ) + resp.raise_for_status() + return resp.json() + + async def get_spreadsheet_metadata(self, spreadsheet_id: str) -> dict[str, Any]: + """Get spreadsheet metadata: title, sheets, dimensions.""" + resp = await self._client.get( + f"{SHEETS_API_BASE}/{spreadsheet_id}", + params={"fields": "properties.title,sheets.properties"}, + ) + resp.raise_for_status() + return resp.json() + + async def list_spreadsheets( + self, + query: str | None = None, + max_results: int = 20, + ) -> list[dict[str, Any]]: + """List the user's Google Sheets via the Drive API. + + Returns a list of dicts with id, name, modifiedTime, and webViewLink. + """ + q = "mimeType='application/vnd.google-apps.spreadsheet' and trashed=false" + if query: + safe_query = re.sub(r"[^a-zA-Z0-9 ]", "", query) + q += f" and name contains '{safe_query}'" + + resp = await self._client.get( + f"{DRIVE_API_BASE}/files", + params={ + "q": q, + "fields": "files(id,name,modifiedTime,webViewLink)", + "orderBy": "modifiedTime desc", + "pageSize": str(max_results), + }, + ) + resp.raise_for_status() + return resp.json().get("files", []) + + +# ── Converters ───────────────────────────────────────────────────────── + + +def values_to_records(values: list[list[str]]) -> list[dict[str, Any]]: + """Convert 2D values (first row = headers) to list of dicts. + + Example: + [["name", "age"], ["Alice", "30"]] -> [{"name": "Alice", "age": "30"}] + """ + if len(values) < 2: + return [] + headers = values[0] + records = [] + for row in values[1:]: + # Pad short rows with empty strings + padded = row + [""] * (len(headers) - len(row)) + records.append(dict(zip(headers, padded))) + return records + + +def records_to_values(records: list[dict[str, Any]]) -> list[list[str]]: + """Convert list of dicts to 2D values (first row = headers). + + Example: + [{"name": "Alice", "age": 30}] -> [["name", "age"], ["Alice", "30"]] + """ + if not records: + return [] + + # Collect all keys in order of first appearance + headers: list[str] = [] + seen: set[str] = set() + for record in records: + for key in record: + if key not in seen: + headers.append(key) + seen.add(key) + + rows = [headers] + for record in records: + rows.append( + [ + str(v) if v is not None else "" + for v in (record.get(h, "") for h in headers) + ] + ) + return rows diff --git a/everyrow-mcp/src/everyrow_mcp/sheets_models.py b/everyrow-mcp/src/everyrow_mcp/sheets_models.py new file mode 100644 index 00000000..3b2c1a55 --- /dev/null +++ b/everyrow-mcp/src/everyrow_mcp/sheets_models.py @@ -0,0 +1,165 @@ +"""Input models for Google Sheets MCP tools.""" + +from __future__ import annotations + +import re +from typing import Any + +from pydantic import BaseModel, ConfigDict, Field, field_validator + +# Matches the 44-char alphanumeric spreadsheet ID in a Google Sheets URL +_SHEETS_URL_RE = re.compile(r"/spreadsheets/d/([a-zA-Z0-9_-]+)") + +# A1 notation range validation +_A1_RANGE_RE = re.compile(r"^[A-Za-z0-9_' !:$]+$") +_MAX_RANGE_LENGTH = 200 + + +def _extract_spreadsheet_id(v: str) -> str: + """Accept a full Google Sheets URL or a bare spreadsheet ID. + + Extracts the ID from URLs like: + https://docs.google.com/spreadsheets/d/1BxiMVs0XRA5nFMdKvBdBZjgmUUqptlbs74OgVE2upms/edit + and passes through bare IDs like: + 1BxiMVs0XRA5nFMdKvBdBZjgmUUqptlbs74OgVE2upms + """ + v = v.strip() + m = _SHEETS_URL_RE.search(v) + if m: + return m.group(1) + # Bare ID: must be alphanumeric + hyphens/underscores, typically 44 chars + if re.fullmatch(r"[a-zA-Z0-9_-]+", v) and len(v) >= 10: + return v + raise ValueError( + f"Invalid spreadsheet_id: expected a Google Sheets URL or a bare spreadsheet ID, got {v!r}" + ) + + +def _validate_a1_range(v: str) -> str: + """Validate an A1 notation range string.""" + if len(v) > _MAX_RANGE_LENGTH: + raise ValueError(f"Range too long ({len(v)} chars, max {_MAX_RANGE_LENGTH})") + if not _A1_RANGE_RE.fullmatch(v): + raise ValueError( + "Invalid range: contains disallowed characters. " + "Use A1 notation (e.g. 'Sheet1!A1:D10')." + ) + return v + + +class SheetsReadInput(BaseModel): + """Input for the sheets_read tool.""" + + model_config = ConfigDict(str_strip_whitespace=True, extra="forbid") + + spreadsheet_id: str = Field( + ..., + description="Google Sheets spreadsheet ID or full URL.", + ) + range: str = Field( + default="Sheet1", + description="A1 notation range to read. Examples: 'Sheet1' (entire sheet), " + "'Sheet1!A1:D10' (rectangle), 'Sheet1!B:B' (single column), " + "'Sheet1!1:5' (first 5 rows), 'Sheet2' (different tab). " + "Defaults to entire first sheet.", + ) + + @field_validator("spreadsheet_id") + @classmethod + def extract_id(cls, v: str) -> str: + return _extract_spreadsheet_id(v) + + @field_validator("range") + @classmethod + def validate_range(cls, v: str) -> str: + return _validate_a1_range(v) + + +class SheetsWriteInput(BaseModel): + """Input for the sheets_write tool.""" + + model_config = ConfigDict(str_strip_whitespace=True, extra="forbid") + + spreadsheet_id: str = Field( + ..., + description="Google Sheets spreadsheet ID or full URL.", + ) + range: str = Field( + default="Sheet1", + description="A1 notation range to write to. To add columns next to existing data, " + "use the first empty column (e.g. 'Sheet1!E1'). Only the target range is " + "affected — existing data in other columns is preserved.", + ) + data: list[dict[str, Any]] = Field( + ..., + description="Data as a list of dicts (JSON records). Keys become column headers.", + min_length=1, + ) + append: bool = Field( + default=False, + description="If True, append after existing data instead of overwriting.", + ) + confirm_overwrite: bool = Field( + default=False, + description="Must be set to True to overwrite existing data when append=False. " + "The tool will check if the range has data and warn you first.", + ) + + @field_validator("spreadsheet_id") + @classmethod + def extract_id(cls, v: str) -> str: + return _extract_spreadsheet_id(v) + + @field_validator("range") + @classmethod + def validate_range(cls, v: str) -> str: + return _validate_a1_range(v) + + +class SheetsCreateInput(BaseModel): + """Input for the sheets_create tool.""" + + model_config = ConfigDict(str_strip_whitespace=True, extra="forbid") + + title: str = Field( + ..., + description="Title for the new spreadsheet.", + min_length=1, + ) + data: list[dict[str, Any]] | None = Field( + default=None, + description="Optional initial data as a list of dicts (JSON records).", + ) + + +class SheetsInfoInput(BaseModel): + """Input for the sheets_info tool.""" + + model_config = ConfigDict(str_strip_whitespace=True, extra="forbid") + + spreadsheet_id: str = Field( + ..., + description="Google Sheets spreadsheet ID or full URL.", + ) + + @field_validator("spreadsheet_id") + @classmethod + def extract_id(cls, v: str) -> str: + return _extract_spreadsheet_id(v) + + +class SheetsListInput(BaseModel): + """Input for the sheets_list tool.""" + + model_config = ConfigDict(str_strip_whitespace=True, extra="forbid") + + query: str | None = Field( + default=None, + description="Optional search query to filter spreadsheets by name (e.g. 'Budget 2024').", + ) + max_results: int = Field( + default=20, + description="Maximum number of spreadsheets to return.", + ge=1, + le=100, + ) diff --git a/everyrow-mcp/src/everyrow_mcp/sheets_tools.py b/everyrow-mcp/src/everyrow_mcp/sheets_tools.py new file mode 100644 index 00000000..b100d077 --- /dev/null +++ b/everyrow-mcp/src/everyrow_mcp/sheets_tools.py @@ -0,0 +1,389 @@ +"""Google Sheets MCP tools for the everyrow MCP server. + +Provides 5 tools: sheets_list, sheets_read, sheets_write, sheets_create, sheets_info. +All tools use the existing FastMCP instance from app.py. +""" + +from __future__ import annotations + +import json +import logging + +import httpx +from mcp.server.auth.middleware.auth_context import get_access_token +from mcp.types import TextContent, ToolAnnotations + +from everyrow_mcp.app import mcp +from everyrow_mcp.config import settings +from everyrow_mcp.redis_store import build_key, get_redis_client +from everyrow_mcp.sheets_client import ( + GoogleSheetsClient, + get_google_token, + records_to_values, + values_to_records, +) +from everyrow_mcp.sheets_models import ( + SheetsCreateInput, + SheetsInfoInput, + SheetsListInput, + SheetsReadInput, + SheetsWriteInput, +) + +logger = logging.getLogger(__name__) + + +def _error_message(e: Exception) -> str: + """Format a user-friendly error message from a Google API exception.""" + if isinstance(e, httpx.HTTPStatusError): + status = e.response.status_code + if status == 403: + return "Permission denied. Check that the spreadsheet is shared with you." + if status == 404: + return "Spreadsheet not found. Check the spreadsheet ID or URL." + if status == 429: + return "Rate limited by Google API. Please try again in a moment." + return f"Google API error (HTTP {status}). Please try again." + return f"Sheets operation failed ({type(e).__name__}). Please try again." + + +async def _check_sheets_rate_limit() -> list[TextContent] | None: + """Enforce per-user rate limiting on sheets operations. + + Returns an error response if the rate limit is exceeded, or ``None`` if OK. + Only active in HTTP mode; always returns ``None`` for stdio. + Fail-open if Redis is unavailable. + """ + if not settings.is_http: + return None + + try: + access_token = get_access_token() + user_id = access_token.client_id if access_token else "anonymous" + redis = get_redis_client() + rl_key = build_key("ratelimit", "sheets", user_id) + async with redis.pipeline() as pipe: + pipe.incr(rl_key) + pipe.expire(rl_key, settings.sheets_rate_window, nx=True) + count, _ = await pipe.execute() + if count > settings.sheets_rate_limit: + return [ + TextContent( + type="text", + text="Sheets rate limit exceeded. Please wait before trying again.", + ) + ] + except Exception: + logger.debug("Sheets rate limit check failed (fail-open)", exc_info=True) + return None + + +def _audit_user_id() -> str: + """Best-effort user ID for audit logs.""" + try: + token = get_access_token() + return token.client_id if token else "unknown" + except Exception: + return "unknown" + + +@mcp.tool( + name="sheets_list", + annotations=ToolAnnotations( + title="List Google Sheets", + readOnlyHint=True, + destructiveHint=False, + idempotentHint=True, + openWorldHint=True, + ), +) +async def sheets_list(params: SheetsListInput) -> list[TextContent]: + """List the user's Google Sheets, optionally filtered by name.""" + if denied := await _check_sheets_rate_limit(): + return denied + try: + token = await get_google_token() + async with GoogleSheetsClient(token) as client: + files = await client.list_spreadsheets( + query=params.query, max_results=params.max_results + ) + except Exception as e: + return [TextContent(type="text", text=_error_message(e))] + + if not files: + msg = "No spreadsheets found" + if params.query: + msg += f" matching '{params.query}'" + msg += "." + return [TextContent(type="text", text=msg)] + + return [ + TextContent( + type="text", + text=json.dumps(files, ensure_ascii=False), + ) + ] + + +@mcp.tool( + name="sheets_read", + annotations=ToolAnnotations( + title="Read Google Sheet", + readOnlyHint=True, + destructiveHint=False, + idempotentHint=True, + openWorldHint=True, + ), +) +async def sheets_read(params: SheetsReadInput) -> list[TextContent]: + """Read data from a Google Sheet and return it as JSON records. + + Returns a list of dicts where keys are column headers. The output is + directly compatible with everyrow tools' input_json parameter. + + Example flow: + data = sheets_read(spreadsheet_id="...") -> list[dict] + everyrow_agent(input_json=data, task="Research each company") + sheets_write(spreadsheet_id="...", data=enriched_results) + """ + if denied := await _check_sheets_rate_limit(): + return denied + try: + token = await get_google_token() + async with GoogleSheetsClient(token) as client: + values = await client.read_range( + params.spreadsheet_id, cell_range=params.range + ) + except Exception as e: + return [TextContent(type="text", text=_error_message(e))] + + records = values_to_records(values) + + if not records: + return [ + TextContent( + type="text", + text="The sheet is empty or contains only headers (no data rows).", + ) + ] + + return [ + TextContent( + type="text", + text=json.dumps(records, ensure_ascii=False), + ) + ] + + +@mcp.tool( + name="sheets_write", + annotations=ToolAnnotations( + title="Write to Google Sheet", + readOnlyHint=False, + destructiveHint=True, + idempotentHint=False, + openWorldHint=True, + ), +) +async def sheets_write(params: SheetsWriteInput) -> list[TextContent]: + """Write data to a Google Sheet. + + Accepts a list of dicts (JSON records). Keys become column headers. + Only the specified range is affected — other cells are untouched. + + To add new columns next to existing data, set range to the first empty + column (e.g. 'Sheet1!E1') and pass only the new columns. You do NOT + need to rewrite the entire sheet. + + Use append=True to add rows after existing data instead of overwriting. + """ + if denied := await _check_sheets_rate_limit(): + return denied + try: + token = await get_google_token() + values = records_to_values(params.data) + + async with GoogleSheetsClient(token) as client: + if params.append: + result = await client.append_range( + params.spreadsheet_id, cell_range=params.range, values=values + ) + updated_range = result.get("updates", {}).get( + "updatedRange", params.range + ) + updated_rows = result.get("updates", {}).get( + "updatedRows", len(params.data) + ) + logger.info( + "AUDIT sheets_write user=%s spreadsheet=%s rows=%s append=true", + _audit_user_id(), + params.spreadsheet_id, + updated_rows, + ) + return [ + TextContent( + type="text", + text=f"Appended {updated_rows} rows to {updated_range}.", + ) + ] + else: + # Pre-check: warn if the target range already has data + if not params.confirm_overwrite: + existing = await client.read_range( + params.spreadsheet_id, cell_range=params.range + ) + if existing: + existing_rows = len(existing) + return [ + TextContent( + type="text", + text=f"The range '{params.range}' already contains {existing_rows} rows " + f"(including headers). Writing will overwrite this data. " + f"To proceed, call again with confirm_overwrite=True, " + f"or use append=True to add rows after existing data.", + ) + ] + + result = await client.write_range( + params.spreadsheet_id, cell_range=params.range, values=values + ) + updated_range = result.get("updatedRange", params.range) + updated_rows = result.get("updatedRows", len(params.data) + 1) + logger.info( + "AUDIT sheets_write user=%s spreadsheet=%s rows=%s append=false", + _audit_user_id(), + params.spreadsheet_id, + updated_rows, + ) + return [ + TextContent( + type="text", + text=f"Wrote {updated_rows} rows (including header) to {updated_range}.", + ) + ] + except Exception as e: + return [TextContent(type="text", text=_error_message(e))] + + +@mcp.tool( + name="sheets_create", + annotations=ToolAnnotations( + title="Create Google Sheet", + readOnlyHint=False, + destructiveHint=False, + idempotentHint=False, + openWorldHint=True, + ), +) +async def sheets_create(params: SheetsCreateInput) -> list[TextContent]: + """Create a new Google Sheet, optionally populated with data. + + Returns the spreadsheet ID and URL. + """ + if denied := await _check_sheets_rate_limit(): + return denied + try: + token = await get_google_token() + + async with GoogleSheetsClient(token) as client: + # Duplicate title guard + existing = await client.list_spreadsheets( + query=params.title, max_results=50 + ) + for f in existing: + if f.get("name") == params.title: + return [ + TextContent( + type="text", + text=f"A spreadsheet named '{params.title}' already exists " + f"(id: {f['id']}). Pick a different title to avoid " + f"creating a duplicate.", + ) + ] + + metadata = await client.create_spreadsheet(params.title) + spreadsheet_id = metadata["spreadsheetId"] + url = metadata.get( + "spreadsheetUrl", + f"https://docs.google.com/spreadsheets/d/{spreadsheet_id}", + ) + + # Optionally populate with initial data + if params.data: + values = records_to_values(params.data) + await client.write_range(spreadsheet_id, "Sheet1", values) + except Exception as e: + return [TextContent(type="text", text=_error_message(e))] + + logger.info( + "AUDIT sheets_create user=%s spreadsheet=%s rows=%s", + _audit_user_id(), + spreadsheet_id, + len(params.data) if params.data else 0, + ) + + result = { + "spreadsheet_id": spreadsheet_id, + "url": url, + "title": params.title, + } + if params.data: + result["rows_written"] = len(params.data) + + return [ + TextContent( + type="text", + text=json.dumps(result, ensure_ascii=False), + ) + ] + + +@mcp.tool( + name="sheets_info", + annotations=ToolAnnotations( + title="Get Google Sheet Info", + readOnlyHint=True, + destructiveHint=False, + idempotentHint=True, + openWorldHint=True, + ), +) +async def sheets_info(params: SheetsInfoInput) -> list[TextContent]: + """Get metadata about a Google Sheet: title, sheet names, and dimensions.""" + if denied := await _check_sheets_rate_limit(): + return denied + try: + token = await get_google_token() + + async with GoogleSheetsClient(token) as client: + metadata = await client.get_spreadsheet_metadata(params.spreadsheet_id) + except Exception as e: + return [TextContent(type="text", text=_error_message(e))] + + title = metadata.get("properties", {}).get("title", "Unknown") + sheets = [] + for sheet in metadata.get("sheets", []): + props = sheet.get("properties", {}) + grid = props.get("gridProperties", {}) + sheets.append( + { + "name": props.get("title", ""), + "index": props.get("index", 0), + "rows": grid.get("rowCount", 0), + "columns": grid.get("columnCount", 0), + } + ) + + result = { + "spreadsheet_id": params.spreadsheet_id, + "title": title, + "url": f"https://docs.google.com/spreadsheets/d/{params.spreadsheet_id}", + "sheets": sheets, + } + + return [ + TextContent( + type="text", + text=json.dumps(result, ensure_ascii=False), + ) + ] diff --git a/everyrow-mcp/src/everyrow_mcp/tools.py b/everyrow-mcp/src/everyrow_mcp/tools.py index 09889b60..8adb18bc 100644 --- a/everyrow-mcp/src/everyrow_mcp/tools.py +++ b/everyrow-mcp/src/everyrow_mcp/tools.py @@ -107,6 +107,71 @@ async def _check_task_ownership(task_id: str) -> list[TextContent] | None: return None +async def _write_results_to_sheet( + df: Any, title: str, preview_size: int = 5 +) -> list[TextContent]: + """Create a new Google Sheet and write the full DataFrame there. + + Raises if a spreadsheet with the same title already exists. + Returns human-readable text with a link to the new sheet. + """ + import pandas as pd # noqa: PLC0415 + + from everyrow_mcp.sheets_client import ( # noqa: PLC0415 + GoogleSheetsClient, + get_google_token, + records_to_values, + ) + from everyrow_mcp.sheets_tools import _check_sheets_rate_limit # noqa: PLC0415 + + if denied := await _check_sheets_rate_limit(): + return denied + + token = await get_google_token() + async with GoogleSheetsClient(token) as client: + # Best-effort duplicate guard (TOCTOU race is inherent to the + # Drive API — two concurrent creates can both pass this check). + existing = await client.list_spreadsheets(query=title, max_results=50) + for f in existing: + if f.get("name") == title: + raise ValueError( + f"A spreadsheet named '{title}' already exists " + f"(id: {f['id']}). Pick a different title to avoid " + f"overwriting existing data." + ) + + # Create and populate + metadata = await client.create_spreadsheet(title) + spreadsheet_id = metadata["spreadsheetId"] + url = metadata.get( + "spreadsheetUrl", + f"https://docs.google.com/spreadsheets/d/{spreadsheet_id}", + ) + + records = df.where(pd.notna(df), None).to_dict(orient="records") + values = records_to_values(records) + await client.write_range(spreadsheet_id, "Sheet1", values) + + total = len(df) + preview = ( + df.head(preview_size) + .where(pd.notna(df.head(preview_size)), None) + .to_dict(orient="records") + ) + summary = f"Created Google Sheet '{title}' with {total} rows.\nURL: {url}" + + widget_data: dict = { + "preview": preview, + "total": total, + "spreadsheet_url": url, + } + + return [ + TextContent(type="text", text=json.dumps(widget_data)), + TextContent(type="text", text=summary), + ] + + @mcp.tool( name="everyrow_browse_lists", structured_output=False, @@ -1041,7 +1106,11 @@ async def everyrow_results_stdio( """Retrieve results from a completed everyrow task and save them to a CSV. Only call this after everyrow_progress reports status 'completed'. + Pass output_path (ending in .csv) to save results as a local CSV file. + Optionally pass output_spreadsheet_title to create a new Google Sheet with + the full results. This always creates a new sheet — it refuses to overwrite + an existing sheet with the same title. """ client = _get_client(ctx) task_id = params.task_id @@ -1081,15 +1150,16 @@ async def everyrow_results_stdio( ] -async def everyrow_results_http( +async def everyrow_results_http( # noqa: PLR0911 params: HttpResultsInput, ctx: EveryRowContext ) -> list[TextContent]: """Retrieve results from a completed everyrow task. Only call this after everyrow_progress reports status 'completed'. - The user always has access to all rows via the widget — page_size only - controls how many rows _you_ can read. - After results load, tell the user how many rows you can see vs the total. + Results are returned as a paginated preview with a download link. + Optionally pass output_spreadsheet_title to create a new Google Sheet with + the full results. This always creates a new sheet — it refuses to overwrite + an existing sheet with the same title. """ client = _get_client(ctx) task_id = params.task_id @@ -1148,6 +1218,19 @@ async def everyrow_results_http( ) ] + # ── Google Sheets output ───────────────────────────────────── + if params.output_spreadsheet_title: + try: + return await _write_results_to_sheet(df, params.output_spreadsheet_title) + except Exception as e: + logger.exception("Failed to write results to Google Sheet") + return [ + TextContent( + type="text", + text=f"Failed to write results to Google Sheet ({type(e).__name__}). Please try again.", + ) + ] + # output_path is accepted by the schema but ignored in HTTP mode — # the server must not write to its own filesystem on remote request. diff --git a/everyrow-mcp/src/everyrow_mcp/utils.py b/everyrow-mcp/src/everyrow_mcp/utils.py index 7bd26e10..d3820cbe 100644 --- a/everyrow-mcp/src/everyrow_mcp/utils.py +++ b/everyrow-mcp/src/everyrow_mcp/utils.py @@ -144,6 +144,11 @@ async def _validate_url_target(url: str) -> None: await _resolve_and_validate(hostname) +def _is_google_url(url: str) -> bool: + """Check if a URL points to Google Sheets or Drive.""" + return "docs.google.com" in url or "drive.google.com" in url + + def is_url(value: str) -> bool: """Check if a string looks like an HTTP(S) URL.""" return value.startswith("http://") or value.startswith("https://") @@ -278,6 +283,7 @@ async def fetch_csv_from_url(url: str) -> pd.DataFrame: """Fetch CSV data from a URL and return a DataFrame. Automatically normalises Google Sheets URLs to their CSV export endpoint. + Authenticates Google URLs with the user's token when available. Validates that the URL (and any redirects) do not target internal networks. Raises: @@ -287,6 +293,17 @@ async def fetch_csv_from_url(url: str) -> pd.DataFrame: url = _normalise_google_sheets_url(url) await _validate_url_target(url) + # Authenticate Google URLs with the user's OAuth token + headers: dict[str, str] = {} + if _is_google_url(url): + try: + from everyrow_mcp.sheets_client import get_google_token # noqa: PLC0415 + + token = await get_google_token() + headers["Authorization"] = f"Bearer {token}" + except Exception: + logger.debug("No Google token available, fetching without auth") + async with httpx.AsyncClient( transport=_SSRFSafeTransport(), follow_redirects=True, @@ -295,7 +312,7 @@ async def fetch_csv_from_url(url: str) -> pd.DataFrame: event_hooks={"response": [_check_redirect]}, ) as client: # Stream the response to enforce a size limit before buffering - async with client.stream("GET", url) as response: + async with client.stream("GET", url, headers=headers) as response: response.raise_for_status() content_length = response.headers.get("content-length") if content_length and int(content_length) > settings.max_fetch_size_bytes: diff --git a/everyrow-mcp/tests/test_mcp_e2e.py b/everyrow-mcp/tests/test_mcp_e2e.py index 660ee564..0cb61a11 100644 --- a/everyrow-mcp/tests/test_mcp_e2e.py +++ b/everyrow-mcp/tests/test_mcp_e2e.py @@ -28,7 +28,8 @@ from mcp.shared.memory import create_connected_server_and_client_session from mcp.types import TextContent -# Import tools module to trigger @mcp.tool() registration on the FastMCP instance +# Import tools modules to trigger @mcp.tool() registration on the FastMCP instance +import everyrow_mcp.sheets_tools import everyrow_mcp.tools # noqa: F401 from everyrow_mcp import redis_store from everyrow_mcp.app import mcp as mcp_app @@ -69,7 +70,11 @@ def _http_state(fake_redis): )(everyrow_results_http) with ( - override_settings(transport="streamable-http", upload_secret="test-secret"), + override_settings( + transport="streamable-http", + upload_secret="test-secret", + enable_sheets_tools=True, + ), patch.object(redis_store, "get_redis_client", return_value=fake_redis), patch("everyrow_mcp.tools.get_access_token", _fake_access_token), patch("everyrow_mcp.tool_helpers.get_access_token", _fake_access_token), @@ -166,7 +171,7 @@ class TestMcpProtocol: @pytest.mark.asyncio async def test_list_tools(self, _http_state): - """list_tools returns all registered tools (including upload_data).""" + """list_tools returns all registered tools.""" async with mcp_client() as session: result = await session.list_tools() tool_names = sorted(t.name for t in result.tools) @@ -188,6 +193,11 @@ async def test_list_tools(self, _http_state): "everyrow_single_agent", "everyrow_upload_data", "everyrow_use_list", + "sheets_list", + "sheets_read", + "sheets_write", + "sheets_create", + "sheets_info", ] ) assert tool_names == expected diff --git a/everyrow-mcp/tests/test_sheets_tools.py b/everyrow-mcp/tests/test_sheets_tools.py new file mode 100644 index 00000000..328f6665 --- /dev/null +++ b/everyrow-mcp/tests/test_sheets_tools.py @@ -0,0 +1,814 @@ +"""Tests for Google Sheets MCP tools. + +All Google Sheets API calls are mocked via httpx responses. +""" + +from __future__ import annotations + +import json +from typing import Any +from unittest.mock import AsyncMock, patch + +import httpx +import pytest + +from everyrow_mcp.sheets_client import ( + GoogleSheetsClient, + records_to_values, + values_to_records, +) +from everyrow_mcp.sheets_models import ( + SheetsCreateInput, + SheetsInfoInput, + SheetsListInput, + SheetsReadInput, + SheetsWriteInput, + _extract_spreadsheet_id, +) +from everyrow_mcp.sheets_tools import ( + _error_message, + sheets_create, + sheets_info, + sheets_list, + sheets_read, + sheets_write, +) + + +@pytest.fixture(autouse=True) +def _no_rate_limit(): + """Disable rate limiting for all tool tests.""" + with patch( + "everyrow_mcp.sheets_tools._check_sheets_rate_limit", + new_callable=AsyncMock, + return_value=None, + ): + yield + + +# ── Model validation tests ─────────────────────────────────────────── + + +class TestSpreadsheetIdExtraction: + def test_bare_id(self): + bare = "1BxiMVs0XRA5nFMdKvBdBZjgmUUqptlbs74OgVE2upms" + assert _extract_spreadsheet_id(bare) == bare + + def test_full_url(self): + url = "https://docs.google.com/spreadsheets/d/1BxiMVs0XRA5nFMdKvBdBZjgmUUqptlbs74OgVE2upms/edit#gid=0" + assert ( + _extract_spreadsheet_id(url) + == "1BxiMVs0XRA5nFMdKvBdBZjgmUUqptlbs74OgVE2upms" + ) + + def test_url_without_edit(self): + url = "https://docs.google.com/spreadsheets/d/1BxiMVs0XRA5nFMdKvBdBZjgmUUqptlbs74OgVE2upms" + assert ( + _extract_spreadsheet_id(url) + == "1BxiMVs0XRA5nFMdKvBdBZjgmUUqptlbs74OgVE2upms" + ) + + def test_invalid_id_too_short(self): + with pytest.raises(ValueError, match="Invalid spreadsheet_id"): + _extract_spreadsheet_id("short") + + def test_invalid_id_special_chars(self): + with pytest.raises(ValueError, match="Invalid spreadsheet_id"): + _extract_spreadsheet_id("not a valid id!@#$") + + def test_whitespace_stripped(self): + bare = " 1BxiMVs0XRA5nFMdKvBdBZjgmUUqptlbs74OgVE2upms " + assert ( + _extract_spreadsheet_id(bare) + == "1BxiMVs0XRA5nFMdKvBdBZjgmUUqptlbs74OgVE2upms" + ) + + +class TestSheetsReadInput: + def test_url_extraction(self): + inp = SheetsReadInput( + spreadsheet_id="https://docs.google.com/spreadsheets/d/abc123def456ghi789jkl012mno345pqr678stu901v" + ) + assert inp.spreadsheet_id == "abc123def456ghi789jkl012mno345pqr678stu901v" + + def test_default_range(self): + inp = SheetsReadInput( + spreadsheet_id="abc123def456ghi789jkl012mno345pqr678stu901v" + ) + assert inp.range == "Sheet1" + + def test_custom_range(self): + inp = SheetsReadInput( + spreadsheet_id="abc123def456ghi789jkl012mno345pqr678stu901v", + range="Sheet2!A1:D10", + ) + assert inp.range == "Sheet2!A1:D10" + + +class TestSheetsWriteInput: + def test_valid_input(self): + inp = SheetsWriteInput( + spreadsheet_id="abc123def456ghi789jkl012mno345pqr678stu901v", + data=[{"name": "Alice", "age": "30"}], + ) + assert inp.append is False + + def test_append_flag(self): + inp = SheetsWriteInput( + spreadsheet_id="abc123def456ghi789jkl012mno345pqr678stu901v", + data=[{"name": "Alice"}], + append=True, + ) + assert inp.append is True + + def test_empty_data_rejected(self): + with pytest.raises(Exception): + SheetsWriteInput( + spreadsheet_id="abc123def456ghi789jkl012mno345pqr678stu901v", + data=[], + ) + + +class TestSheetsCreateInput: + def test_title_required(self): + with pytest.raises(Exception): + SheetsCreateInput(title="") + + def test_optional_data(self): + inp = SheetsCreateInput(title="My Sheet") + assert inp.data is None + + def test_with_data(self): + inp = SheetsCreateInput(title="My Sheet", data=[{"col": "val"}]) + assert inp.data == [{"col": "val"}] + + +class TestSheetsInfoInput: + def test_url_extraction(self): + inp = SheetsInfoInput( + spreadsheet_id="https://docs.google.com/spreadsheets/d/abc123def456ghi789jkl012mno345pqr678stu901v/edit" + ) + assert inp.spreadsheet_id == "abc123def456ghi789jkl012mno345pqr678stu901v" + + +# ── Converter tests ────────────────────────────────────────────────── + + +class TestValuesToRecords: + def test_basic_conversion(self): + values = [["name", "age"], ["Alice", "30"], ["Bob", "25"]] + records = values_to_records(values) + assert records == [ + {"name": "Alice", "age": "30"}, + {"name": "Bob", "age": "25"}, + ] + + def test_empty_sheet(self): + assert values_to_records([]) == [] + + def test_headers_only(self): + assert values_to_records([["name", "age"]]) == [] + + def test_short_rows_padded(self): + values = [["name", "age", "city"], ["Alice"]] + records = values_to_records(values) + assert records == [{"name": "Alice", "age": "", "city": ""}] + + +class TestRecordsToValues: + def test_basic_conversion(self): + records = [{"name": "Alice", "age": 30}] + values = records_to_values(records) + assert values == [["name", "age"], ["Alice", "30"]] + + def test_empty_records(self): + assert records_to_values([]) == [] + + def test_preserves_key_order(self): + records = [{"z": "1", "a": "2"}, {"z": "3", "a": "4"}] + values = records_to_values(records) + assert values[0] == ["z", "a"] + + def test_missing_keys_become_empty(self): + records = [{"a": "1", "b": "2"}, {"a": "3"}] + values = records_to_values(records) + assert values[2] == ["3", ""] + + def test_none_values_become_empty(self): + """None values (e.g. from pandas NaN) should become empty strings, not 'None'.""" + records = [{"name": "Alice", "age": None}, {"name": None, "age": "30"}] + values = records_to_values(records) + assert values[1] == ["Alice", ""] + assert values[2] == ["", "30"] + + +# ── Client tests (mocked httpx) ───────────────────────────────────── + + +def _mock_response(data: Any, status: int = 200) -> httpx.Response: + return httpx.Response( + status_code=status, + json=data, + request=httpx.Request("GET", "https://example.com"), + ) + + +class TestGoogleSheetsClient: + @pytest.mark.asyncio + async def test_read_range(self): + expected_values = [["name", "age"], ["Alice", "30"]] + mock_resp = _mock_response({"values": expected_values}) + + with patch.object( + httpx.AsyncClient, "get", new_callable=AsyncMock, return_value=mock_resp + ): + async with GoogleSheetsClient("fake-token") as client: + result = await client.read_range("sheet-id", "Sheet1") + assert result == expected_values + + @pytest.mark.asyncio + async def test_read_range_empty(self): + mock_resp = _mock_response({}) + + with patch.object( + httpx.AsyncClient, "get", new_callable=AsyncMock, return_value=mock_resp + ): + async with GoogleSheetsClient("fake-token") as client: + result = await client.read_range("sheet-id", "Sheet1") + assert result == [] + + @pytest.mark.asyncio + async def test_write_range(self): + mock_resp = _mock_response( + { + "updatedRange": "Sheet1!A1:B3", + "updatedRows": 3, + } + ) + + with patch.object( + httpx.AsyncClient, "put", new_callable=AsyncMock, return_value=mock_resp + ): + async with GoogleSheetsClient("fake-token") as client: + result = await client.write_range( + "sheet-id", "Sheet1", [["a", "b"], ["1", "2"]] + ) + assert result["updatedRows"] == 3 + + @pytest.mark.asyncio + async def test_append_range(self): + mock_resp = _mock_response( + { + "updates": { + "updatedRange": "Sheet1!A4:B5", + "updatedRows": 2, + } + } + ) + + with patch.object( + httpx.AsyncClient, "post", new_callable=AsyncMock, return_value=mock_resp + ): + async with GoogleSheetsClient("fake-token") as client: + result = await client.append_range("sheet-id", "Sheet1", [["1", "2"]]) + assert result["updates"]["updatedRows"] == 2 + + @pytest.mark.asyncio + async def test_create_spreadsheet(self): + mock_resp = _mock_response( + { + "spreadsheetId": "new-id-123", + "spreadsheetUrl": "https://docs.google.com/spreadsheets/d/new-id-123", + } + ) + + with patch.object( + httpx.AsyncClient, "post", new_callable=AsyncMock, return_value=mock_resp + ): + async with GoogleSheetsClient("fake-token") as client: + result = await client.create_spreadsheet("Test Sheet") + assert result["spreadsheetId"] == "new-id-123" + + @pytest.mark.asyncio + async def test_get_spreadsheet_metadata(self): + mock_resp = _mock_response( + { + "properties": {"title": "My Sheet"}, + "sheets": [ + { + "properties": { + "title": "Sheet1", + "index": 0, + "gridProperties": {"rowCount": 100, "columnCount": 26}, + } + } + ], + } + ) + + with patch.object( + httpx.AsyncClient, "get", new_callable=AsyncMock, return_value=mock_resp + ): + async with GoogleSheetsClient("fake-token") as client: + result = await client.get_spreadsheet_metadata("sheet-id") + assert result["properties"]["title"] == "My Sheet" + assert result["sheets"][0]["properties"]["title"] == "Sheet1" + + +# ── Tool integration tests (mock token + httpx) ───────────────────── + + +@pytest.fixture +def _mock_google_token(): + """Patch get_google_token to return a fake token.""" + with patch( + "everyrow_mcp.sheets_tools.get_google_token", + new_callable=AsyncMock, + return_value="fake-google-token", + ) as m: + yield m + + +class TestSheetsReadTool: + @pytest.mark.asyncio + async def test_returns_json_records(self, _mock_google_token): + values = [["name", "age"], ["Alice", "30"], ["Bob", "25"]] + mock_resp = _mock_response({"values": values}) + + with patch.object( + httpx.AsyncClient, "get", new_callable=AsyncMock, return_value=mock_resp + ): + result = await sheets_read( + SheetsReadInput( + spreadsheet_id="abc123def456ghi789jkl012mno345pqr678stu901v" + ) + ) + + assert len(result) == 1 + data = json.loads(result[0].text) + assert data == [{"name": "Alice", "age": "30"}, {"name": "Bob", "age": "25"}] + + @pytest.mark.asyncio + async def test_empty_sheet(self, _mock_google_token): + mock_resp = _mock_response({}) + + with patch.object( + httpx.AsyncClient, "get", new_callable=AsyncMock, return_value=mock_resp + ): + result = await sheets_read( + SheetsReadInput( + spreadsheet_id="abc123def456ghi789jkl012mno345pqr678stu901v" + ) + ) + + assert "empty" in result[0].text.lower() + + @pytest.mark.asyncio + async def test_url_extraction(self, _mock_google_token): + values = [["x"], ["1"]] + mock_resp = _mock_response({"values": values}) + + with patch.object( + httpx.AsyncClient, "get", new_callable=AsyncMock, return_value=mock_resp + ) as mock_get: + inp = SheetsReadInput( + spreadsheet_id="https://docs.google.com/spreadsheets/d/abc123def456ghi789jkl012mno345pqr678stu901v/edit" + ) + await sheets_read(inp) + + # Verify the extracted ID was used in the API call + call_url = mock_get.call_args[0][0] + assert "abc123def456ghi789jkl012mno345pqr678stu901v" in call_url + assert "docs.google.com" not in call_url + + +class TestSheetsWriteTool: + @pytest.mark.asyncio + async def test_write_overwrite_confirmed(self, _mock_google_token): + mock_resp = _mock_response( + { + "updatedRange": "Sheet1!A1:B3", + "updatedRows": 3, + } + ) + + with patch.object( + httpx.AsyncClient, "put", new_callable=AsyncMock, return_value=mock_resp + ): + result = await sheets_write( + SheetsWriteInput( + spreadsheet_id="abc123def456ghi789jkl012mno345pqr678stu901v", + data=[{"name": "Alice"}, {"name": "Bob"}], + confirm_overwrite=True, + ) + ) + + assert "Wrote" in result[0].text + + @pytest.mark.asyncio + async def test_write_overwrite_warns_if_existing_data(self, _mock_google_token): + """Writing without confirm_overwrite warns when range has data.""" + read_resp = _mock_response({"values": [["name"], ["Alice"]]}) + + with patch.object( + httpx.AsyncClient, "get", new_callable=AsyncMock, return_value=read_resp + ): + result = await sheets_write( + SheetsWriteInput( + spreadsheet_id="abc123def456ghi789jkl012mno345pqr678stu901v", + data=[{"name": "Bob"}], + ) + ) + + assert "already contains" in result[0].text + assert "confirm_overwrite" in result[0].text + + @pytest.mark.asyncio + async def test_write_overwrite_proceeds_on_empty_range(self, _mock_google_token): + """Writing without confirm_overwrite proceeds when range is empty.""" + read_resp = _mock_response({}) # empty range + write_resp = _mock_response({"updatedRange": "Sheet1!A1:B2", "updatedRows": 2}) + + with ( + patch.object( + httpx.AsyncClient, "get", new_callable=AsyncMock, return_value=read_resp + ), + patch.object( + httpx.AsyncClient, + "put", + new_callable=AsyncMock, + return_value=write_resp, + ), + ): + result = await sheets_write( + SheetsWriteInput( + spreadsheet_id="abc123def456ghi789jkl012mno345pqr678stu901v", + data=[{"name": "Bob"}], + ) + ) + + assert "Wrote" in result[0].text + + @pytest.mark.asyncio + async def test_write_append(self, _mock_google_token): + mock_resp = _mock_response( + { + "updates": { + "updatedRange": "Sheet1!A4:B5", + "updatedRows": 2, + } + } + ) + + with patch.object( + httpx.AsyncClient, "post", new_callable=AsyncMock, return_value=mock_resp + ): + result = await sheets_write( + SheetsWriteInput( + spreadsheet_id="abc123def456ghi789jkl012mno345pqr678stu901v", + data=[{"name": "Alice"}], + append=True, + ) + ) + + assert "Appended" in result[0].text + + +class TestSheetsCreateTool: + @pytest.mark.asyncio + async def test_create_empty(self, _mock_google_token): + list_resp = _mock_response({"files": []}) # no duplicates + create_resp = _mock_response( + { + "spreadsheetId": "new-id-123", + "spreadsheetUrl": "https://docs.google.com/spreadsheets/d/new-id-123", + } + ) + + with ( + patch.object( + httpx.AsyncClient, + "get", + new_callable=AsyncMock, + return_value=list_resp, + ), + patch.object( + httpx.AsyncClient, + "post", + new_callable=AsyncMock, + return_value=create_resp, + ), + ): + result = await sheets_create(SheetsCreateInput(title="Test")) + + data = json.loads(result[0].text) + assert data["spreadsheet_id"] == "new-id-123" + assert "url" in data + assert "rows_written" not in data + + @pytest.mark.asyncio + async def test_create_with_data(self, _mock_google_token): + list_resp = _mock_response({"files": []}) # no duplicates + create_resp = _mock_response( + { + "spreadsheetId": "new-id-456", + "spreadsheetUrl": "https://docs.google.com/spreadsheets/d/new-id-456", + } + ) + write_resp = _mock_response({"updatedRows": 2}) + + with ( + patch.object( + httpx.AsyncClient, + "get", + new_callable=AsyncMock, + return_value=list_resp, + ), + patch.object( + httpx.AsyncClient, + "post", + new_callable=AsyncMock, + return_value=create_resp, + ), + patch.object( + httpx.AsyncClient, + "put", + new_callable=AsyncMock, + return_value=write_resp, + ), + ): + result = await sheets_create( + SheetsCreateInput(title="Test", data=[{"col": "val"}]) + ) + + data = json.loads(result[0].text) + assert data["rows_written"] == 1 + + @pytest.mark.asyncio + async def test_create_rejects_duplicate_title(self, _mock_google_token): + """sheets_create warns when a spreadsheet with the same title exists.""" + list_resp = _mock_response({"files": [{"id": "existing-id", "name": "Budget"}]}) + + with patch.object( + httpx.AsyncClient, "get", new_callable=AsyncMock, return_value=list_resp + ): + result = await sheets_create(SheetsCreateInput(title="Budget")) + + assert "already exists" in result[0].text + assert "existing-id" in result[0].text + + +class TestSheetsInfoTool: + @pytest.mark.asyncio + async def test_returns_metadata(self, _mock_google_token): + mock_resp = _mock_response( + { + "properties": {"title": "Budget 2024"}, + "sheets": [ + { + "properties": { + "title": "Sheet1", + "index": 0, + "gridProperties": {"rowCount": 100, "columnCount": 10}, + } + }, + { + "properties": { + "title": "Summary", + "index": 1, + "gridProperties": {"rowCount": 50, "columnCount": 5}, + } + }, + ], + } + ) + + with patch.object( + httpx.AsyncClient, "get", new_callable=AsyncMock, return_value=mock_resp + ): + result = await sheets_info( + SheetsInfoInput( + spreadsheet_id="abc123def456ghi789jkl012mno345pqr678stu901v" + ) + ) + + data = json.loads(result[0].text) + assert data["title"] == "Budget 2024" + assert len(data["sheets"]) == 2 + assert data["sheets"][0]["name"] == "Sheet1" + assert data["sheets"][0]["rows"] == 100 + assert data["sheets"][1]["name"] == "Summary" + + +class TestSheetsListTool: + @pytest.mark.asyncio + async def test_returns_files(self, _mock_google_token): + files = [ + { + "id": "abc123", + "name": "Budget 2024", + "modifiedTime": "2024-06-01T12:00:00Z", + "webViewLink": "https://docs.google.com/spreadsheets/d/abc123/edit", + }, + { + "id": "def456", + "name": "Contacts", + "modifiedTime": "2024-05-15T09:00:00Z", + "webViewLink": "https://docs.google.com/spreadsheets/d/def456/edit", + }, + ] + mock_resp = _mock_response({"files": files}) + + with patch.object( + httpx.AsyncClient, "get", new_callable=AsyncMock, return_value=mock_resp + ): + result = await sheets_list(SheetsListInput()) + + data = json.loads(result[0].text) + assert len(data) == 2 + assert data[0]["name"] == "Budget 2024" + assert data[1]["id"] == "def456" + + @pytest.mark.asyncio + async def test_empty_results(self, _mock_google_token): + mock_resp = _mock_response({"files": []}) + + with patch.object( + httpx.AsyncClient, "get", new_callable=AsyncMock, return_value=mock_resp + ): + result = await sheets_list(SheetsListInput()) + + assert "No spreadsheets found" in result[0].text + + @pytest.mark.asyncio + async def test_with_query(self, _mock_google_token): + mock_resp = _mock_response({"files": []}) + + with patch.object( + httpx.AsyncClient, "get", new_callable=AsyncMock, return_value=mock_resp + ) as mock_get: + result = await sheets_list(SheetsListInput(query="Budget")) + + assert "Budget" in result[0].text + # Verify the query was included in the Drive API call + call_params = mock_get.call_args[1]["params"] + assert "Budget" in call_params["q"] + + @pytest.mark.asyncio + async def test_max_results(self, _mock_google_token): + mock_resp = _mock_response({"files": []}) + + with patch.object( + httpx.AsyncClient, "get", new_callable=AsyncMock, return_value=mock_resp + ) as mock_get: + await sheets_list(SheetsListInput(max_results=5)) + + call_params = mock_get.call_args[1]["params"] + assert call_params["pageSize"] == "5" + + +# ── Range validation tests (M1) ───────────────────────────────────── + + +class TestRangeValidation: + """Test A1 notation range validation on SheetsReadInput and SheetsWriteInput.""" + + _VALID_ID = "abc123def456ghi789jkl012mno345pqr678stu901v" + + def test_simple_range(self): + inp = SheetsReadInput(spreadsheet_id=self._VALID_ID, range="Sheet1!A1:D10") + assert inp.range == "Sheet1!A1:D10" + + def test_sheet_name_only(self): + inp = SheetsReadInput(spreadsheet_id=self._VALID_ID, range="Sheet1") + assert inp.range == "Sheet1" + + def test_quoted_sheet_name(self): + inp = SheetsReadInput(spreadsheet_id=self._VALID_ID, range="'My Sheet'!A1:B5") + assert inp.range == "'My Sheet'!A1:B5" + + def test_absolute_refs(self): + inp = SheetsReadInput(spreadsheet_id=self._VALID_ID, range="Sheet1!$A$1:$D$10") + assert inp.range == "Sheet1!$A$1:$D$10" + + def test_column_range(self): + inp = SheetsReadInput(spreadsheet_id=self._VALID_ID, range="Sheet1!B:B") + assert inp.range == "Sheet1!B:B" + + def test_rejects_url_significant_chars(self): + with pytest.raises(Exception, match="Invalid range"): + SheetsReadInput(spreadsheet_id=self._VALID_ID, range="Sheet1/../etc/passwd") + + def test_rejects_path_traversal(self): + with pytest.raises(Exception, match="Invalid range"): + SheetsReadInput(spreadsheet_id=self._VALID_ID, range="../../secret") + + def test_rejects_semicolons(self): + with pytest.raises(Exception, match="Invalid range"): + SheetsReadInput(spreadsheet_id=self._VALID_ID, range="Sheet1;DROP TABLE") + + def test_rejects_too_long(self): + with pytest.raises(Exception, match="Range too long"): + SheetsReadInput(spreadsheet_id=self._VALID_ID, range="A" * 201) + + def test_write_input_validates_too(self): + with pytest.raises(Exception, match="Invalid range"): + SheetsWriteInput( + spreadsheet_id=self._VALID_ID, + range="Sheet1/../hack", + data=[{"a": "1"}], + ) + + def test_write_input_valid(self): + inp = SheetsWriteInput( + spreadsheet_id=self._VALID_ID, + range="Sheet1!A1:B5", + data=[{"a": "1"}], + ) + assert inp.range == "Sheet1!A1:B5" + + +# ── Error message sanitization tests (H1) ──────────────────────────── + + +class TestErrorMessageSanitization: + """Ensure error messages don't leak internal details.""" + + def test_http_500_no_response_body(self): + """HTTP 500 error should not include response body.""" + resp = httpx.Response( + status_code=500, + text="Internal server error with secret details", + request=httpx.Request("GET", "https://sheets.googleapis.com/test"), + ) + exc = httpx.HTTPStatusError("error", request=resp.request, response=resp) + msg = _error_message(exc) + assert "secret details" not in msg + assert "500" in msg + assert "Please try again" in msg + + def test_catchall_no_repr(self): + """Catch-all should not include full repr of the exception.""" + exc = RuntimeError("sensitive internal state: token=abc123") + msg = _error_message(exc) + assert "sensitive internal state" not in msg + assert "token=abc123" not in msg + assert "RuntimeError" in msg + assert "Please try again" in msg + + def test_known_statuses_unchanged(self): + """403/404/429 messages should remain user-friendly.""" + for status, keyword in [ + (403, "Permission"), + (404, "not found"), + (429, "Rate limited"), + ]: + resp = httpx.Response( + status_code=status, + text="details", + request=httpx.Request("GET", "https://example.com"), + ) + exc = httpx.HTTPStatusError("err", request=resp.request, response=resp) + msg = _error_message(exc) + assert keyword in msg + assert "details" not in msg + + +# ── Drive query sanitization tests (M6) ────────────────────────────── + + +class TestDriveQuerySanitization: + """Ensure special characters are stripped from Drive API queries.""" + + @pytest.mark.asyncio + async def test_special_chars_stripped(self): + """Quotes and special chars should be removed from the query.""" + mock_resp = _mock_response({"files": []}) + + with patch.object( + httpx.AsyncClient, "get", new_callable=AsyncMock, return_value=mock_resp + ) as mock_get: + async with GoogleSheetsClient("fake-token") as client: + await client.list_spreadsheets(query="Budget' OR 1=1--") + + call_params = mock_get.call_args[1]["params"] + q = call_params["q"] + # Extract just the user query part from: ... name contains 'SANITIZED' + # The sanitized result should be "Budget OR 11" (only alphanum + spaces) + assert "name contains 'Budget OR 11'" in q + # Injection chars must not survive + assert "1=1--" not in q + + @pytest.mark.asyncio + async def test_clean_query_passes_through(self): + """Alphanumeric queries with spaces should pass through.""" + mock_resp = _mock_response({"files": []}) + + with patch.object( + httpx.AsyncClient, "get", new_callable=AsyncMock, return_value=mock_resp + ) as mock_get: + async with GoogleSheetsClient("fake-token") as client: + await client.list_spreadsheets(query="Budget 2024") + + call_params = mock_get.call_args[1]["params"] + assert "Budget 2024" in call_params["q"] diff --git a/everyrow-mcp/tests/test_stdio_content.py b/everyrow-mcp/tests/test_stdio_content.py index bc57636a..f04dc739 100644 --- a/everyrow-mcp/tests/test_stdio_content.py +++ b/everyrow-mcp/tests/test_stdio_content.py @@ -45,6 +45,7 @@ from everyrow_mcp.models import ( AgentInput, DedupeInput, + HttpResultsInput, MergeInput, ProgressInput, RankInput, @@ -579,6 +580,16 @@ async def test_results_api_error(self, tmp_path: Path): class TestToolSchemas: """Verify tool schemas expose the expected fields.""" + def test_http_results_schema_includes_output_spreadsheet_title(self): + """HttpResultsInput schema includes output_spreadsheet_title for Google Sheets export.""" + schema = HttpResultsInput.model_json_schema() + assert "output_spreadsheet_title" in schema["properties"] + + def test_stdio_results_schema_excludes_output_spreadsheet_title(self): + """StdioResultsInput must not expose output_spreadsheet_title (requires HTTP OAuth).""" + schema = StdioResultsInput.model_json_schema() + assert "output_spreadsheet_title" not in schema["properties"] + @pytest.mark.parametrize( "tool_name,def_name", [