From f3a3f23bf0a63f5c4ec1b5bd427429a03f739cc5 Mon Sep 17 00:00:00 2001 From: Dan Schwarz Date: Thu, 26 Feb 2026 19:56:34 -0800 Subject: [PATCH 1/6] Add classify SDK & MCP endpoints - Add classify() and classify_async() to SDK ops.py - Add ClassifyInput model to MCP models.py - Add everyrow_classify MCP tool to tools.py - Add classify entry to manifest.json - Add docs/reference/CLASSIFY.md and update docs/api.md - Add integration test for classify Co-Authored-By: Claude Opus 4.6 --- docs/api.md | 18 ++++- docs/reference/CLASSIFY.md | 86 ++++++++++++++++++++ everyrow-mcp/manifest.json | 4 + everyrow-mcp/src/everyrow_mcp/models.py | 25 ++++++ everyrow-mcp/src/everyrow_mcp/tools.py | 85 +++++++++++++++++++- src/everyrow/ops.py | 100 ++++++++++++++++++++++++ tests/integration/test_classify.py | 65 +++++++++++++++ 7 files changed, 380 insertions(+), 3 deletions(-) create mode 100644 docs/reference/CLASSIFY.md create mode 100644 tests/integration/test_classify.py diff --git a/docs/api.md b/docs/api.md index f777fa75..841d1dca 100644 --- a/docs/api.md +++ b/docs/api.md @@ -1,11 +1,11 @@ --- title: API Reference -description: Complete API reference for everyrow — screen, rank, dedupe, merge, forecast, and research operations powered by LLM web research agents. +description: Complete API reference for everyrow — screen, rank, dedupe, merge, classify, forecast, and research operations powered by LLM web research agents. --- # API Reference -Six operations for processing data with LLM-powered web research agents. Each takes a DataFrame and a natural-language instruction. +Seven operations for processing data with LLM-powered web research agents. Each takes a DataFrame and a natural-language instruction. ## screen @@ -55,6 +55,20 @@ result = await merge(task=..., left_table=df1, right_table=df2) Guides: [Fuzzy Join Without Matching Keys](/docs/fuzzy-join-without-keys) Case Studies: [LLM Merging at Scale](/docs/case-studies/llm-powered-merging-at-scale), [Match Software Vendors to Requirements](/docs/case-studies/match-software-vendors-to-requirements) +## classify + +```python +result = await classify( + task="Classify each company by its primary industry sector", + categories=["Technology", "Finance", "Healthcare", "Energy"], + input=companies_df, +) +``` + +`classify` assigns each row in a DataFrame to one of the provided categories using a two-phase approach: Phase 1 attempts fast batch classification with web research, and Phase 2 follows up with deeper research on ambiguous rows. Supports binary (yes/no) and multi-category classification with optional reasoning output. + +[Full reference →](/docs/reference/CLASSIFY) + ## forecast ```python diff --git a/docs/reference/CLASSIFY.md b/docs/reference/CLASSIFY.md new file mode 100644 index 00000000..111773a8 --- /dev/null +++ b/docs/reference/CLASSIFY.md @@ -0,0 +1,86 @@ +--- +title: classify +description: API reference for the EveryRow classify tool, which assigns each row of a dataset into one of the provided categories using web research. +--- + +# Classify + +`classify` takes a DataFrame and a list of allowed categories, then assigns each row to exactly one category. Uses a two-phase approach: Phase 1 attempts fast batch classification with web research, and Phase 2 follows up with deeper research on ambiguous rows. + +## Examples + +```python +from pandas import DataFrame +from everyrow.ops import classify + +companies = DataFrame([ + {"company": "Apple Inc.", "description": "Consumer electronics and software"}, + {"company": "JPMorgan Chase", "description": "Investment banking and financial services"}, + {"company": "ExxonMobil", "description": "Oil and gas exploration and production"}, +]) + +result = await classify( + task="Classify each company by its primary industry sector", + categories=["Technology", "Finance", "Healthcare", "Energy"], + input=companies, +) +print(result.data[["company", "classification"]]) +``` + +The output DataFrame contains the original columns plus the classification column (default name: `classification`). + +### Binary classification + +For yes/no questions, use two categories: + +```python +result = await classify( + task="Is this company founder-led?", + categories=["yes", "no"], + input=companies, +) +``` + +### Custom output column and reasoning + +```python +result = await classify( + task="Classify each company by its primary industry sector", + categories=["Technology", "Finance", "Healthcare", "Energy"], + input=companies, + classification_field="sector", + include_reasoning=True, +) +print(result.data[["company", "sector", "reasoning"]]) +``` + +## Parameters + +| Name | Type | Default | Description | +|------|------|---------|-------------| +| `task` | str | required | Natural-language instructions describing how to classify each row | +| `categories` | list[str] | required | Allowed category values (minimum 2). Each row is assigned exactly one. | +| `input` | DataFrame | required | Rows to classify | +| `classification_field` | str | `"classification"` | Name of the output column for the assigned category | +| `include_reasoning` | bool | `False` | If True, adds a `reasoning` column with the agent's justification | +| `session` | Session | Optional, auto-created if omitted | | + +## Output + +One column is added to each input row (name controlled by `classification_field`): + +| Column | Type | Description | +|--------|------|-------------| +| `classification` | str | One of the provided `categories` values | +| `reasoning` | str | Agent's justification (only if `include_reasoning=True`) | + +## Via MCP + +MCP tool: `everyrow_classify` + +| Parameter | Type | Description | +|-----------|------|-------------| +| `task` | string | Classification instructions | +| `categories` | list[string] | Allowed categories (minimum 2) | +| `classification_field` | string | Output column name (default: `"classification"`) | +| `include_reasoning` | boolean | Include reasoning column (default: false) | diff --git a/everyrow-mcp/manifest.json b/everyrow-mcp/manifest.json index f416028c..697e640f 100644 --- a/everyrow-mcp/manifest.json +++ b/everyrow-mcp/manifest.json @@ -53,6 +53,10 @@ "name": "everyrow_forecast", "description": "Forecast the probability of binary questions from a CSV file." }, + { + "name": "everyrow_classify", + "description": "Classify each row of a dataset into one of the provided categories." + }, { "name": "everyrow_single_agent", "description": "Run a single web research agent on a task, optionally with context data." diff --git a/everyrow-mcp/src/everyrow_mcp/models.py b/everyrow-mcp/src/everyrow_mcp/models.py index 468f71a2..da3b6fcb 100644 --- a/everyrow-mcp/src/everyrow_mcp/models.py +++ b/everyrow-mcp/src/everyrow_mcp/models.py @@ -465,6 +465,31 @@ class ForecastInput(_SingleSourceInput): ) +class ClassifyInput(_SingleSourceInput): + """Input for the classify operation.""" + + model_config = ConfigDict(str_strip_whitespace=True, extra="forbid") + + task: str = Field( + ..., + description="Natural language instructions describing how to classify each row.", + min_length=1, + ) + categories: list[str] = Field( + ..., + description="Allowed category values (minimum 2). Each row will be assigned one of these.", + min_length=2, + ) + classification_field: str = Field( + default="classification", + description="Name of the output column that will contain the assigned category.", + ) + include_reasoning: bool = Field( + default=False, + description="If true, adds a 'reasoning' column with the agent's justification.", + ) + + class UploadDataInput(BaseModel): """Input for the upload_data tool.""" diff --git a/everyrow-mcp/src/everyrow_mcp/tools.py b/everyrow-mcp/src/everyrow_mcp/tools.py index c9572e07..9a7f8e97 100644 --- a/everyrow-mcp/src/everyrow_mcp/tools.py +++ b/everyrow-mcp/src/everyrow_mcp/tools.py @@ -16,6 +16,7 @@ from everyrow.generated.models.public_task_type import PublicTaskType from everyrow.ops import ( agent_map_async, + classify_async, create_table_artifact, dedupe_async, forecast_async, @@ -36,6 +37,7 @@ from everyrow_mcp.models import ( AgentInput, CancelInput, + ClassifyInput, DedupeInput, ForecastInput, HttpResultsInput, @@ -697,6 +699,87 @@ async def everyrow_forecast( ) +@mcp.tool( + name="everyrow_classify", + structured_output=False, + annotations=ToolAnnotations( + title="Classify Rows", + readOnlyHint=False, + destructiveHint=False, + idempotentHint=False, + openWorldHint=True, + ), +) +async def everyrow_classify( + params: ClassifyInput, ctx: EveryRowContext +) -> list[TextContent]: + """Classify each row of a dataset into one of the provided categories. + + Uses a two-phase approach: Phase 1 attempts fast batch classification with + web research, and Phase 2 follows up with deeper research on ambiguous rows. + Each row is assigned exactly one of the provided categories. + + Examples: + - "Classify each company by its primary industry sector" with categories ["Technology", "Finance", "Healthcare", "Energy"] + - "Is this company founder-led?" with categories ["yes", "no"] + - "Classify by Koppen climate zone" with categories ["tropical", "arid", "temperate", "continental", "polar"] + + Output columns added: the ``classification_field`` column (default: ``classification``) + containing the assigned category. Optionally a ``reasoning`` column if ``include_reasoning`` is true. + + This function submits the task and returns immediately with a task_id and session_url. + After receiving a result from this tool, share the session_url with the user. + Then immediately call everyrow_progress(task_id) to monitor. + Once the task is completed, call everyrow_results to save the output. + """ + logger.info( + "everyrow_classify: task=%.80s categories=%s rows=%s", + params.task, + params.categories, + len(params.data) if params.data else "artifact", + ) + log_client_info(ctx, "everyrow_classify") + client = _get_client(ctx) + + _clear_task_state() + input_data = params._aid_or_dataframe + + async with create_session( + client=client, session_id=params.session_id, name=params.session_name + ) as session: + session_url = session.get_url() + session_id_str = str(session.session_id) + cohort_task = await classify_async( + task=params.task, + categories=params.categories, + session=session, + input=input_data, + classification_field=params.classification_field, + include_reasoning=params.include_reasoning, + ) + task_id = str(cohort_task.task_id) + total = len(input_data) if isinstance(input_data, pd.DataFrame) else 0 + write_initial_task_state( + task_id, + task_type=PublicTaskType.CLASSIFY, + session_url=session_url, + total=total, + input_source=params._input_data_mode.value, + ) + + return await create_tool_response( + task_id=task_id, + session_url=session_url, + label=f"Submitted: {total} rows for classification into {len(params.categories)} categories." + if total + else f"Submitted: artifact for classification into {len(params.categories)} categories.", + token=client.token, + total=total, + mcp_server_url=ctx.request_context.lifespan_context.mcp_server_url, + session_id=session_id_str, + ) + + @mcp.tool( name="everyrow_upload_data", structured_output=False, @@ -714,7 +797,7 @@ async def everyrow_upload_data( """Upload data from a URL or local file. Returns an artifact_id for use in processing tools. Use this tool to ingest data before calling everyrow_agent, everyrow_screen, - everyrow_rank, everyrow_dedupe, everyrow_merge, or everyrow_forecast. + everyrow_rank, everyrow_dedupe, everyrow_merge, everyrow_classify, or everyrow_forecast. Supported sources: - HTTP(S) URLs (including Google Sheets — auto-converted to CSV export) diff --git a/src/everyrow/ops.py b/src/everyrow/ops.py index 0f3ba2e0..e9b9283f 100644 --- a/src/everyrow/ops.py +++ b/src/everyrow/ops.py @@ -10,6 +10,7 @@ from everyrow.generated.api.artifacts import create_artifact_artifacts_post from everyrow.generated.api.operations import ( agent_map_operations_agent_map_post, + classify_operations_classify_post, dedupe_operations_dedupe_post, forecast_operations_forecast_post, merge_operations_merge_post, @@ -21,6 +22,8 @@ AgentMapOperation, AgentMapOperationInputType1Item, AgentMapOperationResponseSchemaType0, + ClassifyOperation, + ClassifyOperationInputType1Item, CreateArtifactRequest, CreateArtifactRequestDataType0Item, CreateArtifactRequestDataType1, @@ -853,3 +856,100 @@ async def forecast_async( ) cohort_task.set_submitted(response.task_id, response.session_id, session.client) return cohort_task + + +# --- Classify --- + + +async def classify( + task: str, + categories: list[str], + input: DataFrame | UUID | TableResult, + classification_field: str = "classification", + include_reasoning: bool = False, + session: Session | None = None, +) -> TableResult: + """Classify each row of a table into one of the provided categories. + + Uses a two-phase approach: Phase 1 attempts fast batch classification using + web research, and Phase 2 follows up with deeper research on ambiguous rows. + Each row is assigned exactly one of the provided categories. + + Args: + task: Natural-language instructions describing how to classify each row. + categories: Allowed category values (minimum 2). Each row will be + assigned exactly one of these. + input: The input table. Each row is classified independently. + classification_field: Name of the output column that will contain the + assigned category. Default: ``"classification"``. + include_reasoning: If True, adds a ``reasoning`` column with the + agent's justification for the classification. + session: Optional session. If not provided, one will be created + automatically. + + Returns: + TableResult with a ``classification_field`` column (and optionally + ``reasoning``) added to each input row. + """ + if session is None: + async with create_session() as internal_session: + cohort_task = await classify_async( + task=task, + categories=categories, + session=internal_session, + input=input, + classification_field=classification_field, + include_reasoning=include_reasoning, + ) + result = await cohort_task.await_result(on_progress=print_progress) + if isinstance(result, TableResult): + return result + raise EveryrowError("Classify task did not return a table result") + cohort_task = await classify_async( + task=task, + categories=categories, + session=session, + input=input, + classification_field=classification_field, + include_reasoning=include_reasoning, + ) + result = await cohort_task.await_result(on_progress=print_progress) + if isinstance(result, TableResult): + return result + raise EveryrowError("Classify task did not return a table result") + + +async def classify_async( + task: str, + categories: list[str], + session: Session, + input: DataFrame | UUID | TableResult, + classification_field: str = "classification", + include_reasoning: bool = False, +) -> EveryrowTask[BaseModel]: + """Submit a classify task asynchronously. + + Returns: + EveryrowTask that resolves to a TableResult with a classification column. + """ + input_data = _prepare_table_input(input, ClassifyOperationInputType1Item) + + body = ClassifyOperation( + input_=input_data, # type: ignore + task=task, + categories=categories, + session_id=session.session_id, + classification_field=classification_field, + include_reasoning=include_reasoning, + ) + + response = await classify_operations_classify_post.asyncio( + client=session.client, body=body + ) + response = handle_response(response) + + cohort_task: EveryrowTask[BaseModel] = EveryrowTask( + response_model=BaseModel, is_map=True, is_expand=False + ) + cohort_task.set_submitted(response.task_id, response.session_id, session.client) + return cohort_task diff --git a/tests/integration/test_classify.py b/tests/integration/test_classify.py new file mode 100644 index 00000000..688fd2ee --- /dev/null +++ b/tests/integration/test_classify.py @@ -0,0 +1,65 @@ +"""Integration tests for classify operation.""" + +import pandas as pd +import pytest + +from everyrow.ops import classify +from everyrow.result import TableResult + +pytestmark = [pytest.mark.integration, pytest.mark.asyncio] + + +async def test_classify_assigns_categories(): + """Test that classify returns a TableResult with correct categories.""" + input_df = pd.DataFrame( + [ + {"company": "Apple Inc.", "description": "Consumer electronics and software"}, + { + "company": "JPMorgan Chase", + "description": "Investment banking and financial services", + }, + { + "company": "ExxonMobil", + "description": "Oil and gas exploration and production", + }, + ] + ) + categories = ["Technology", "Finance", "Energy", "Healthcare"] + + result = await classify( + task="Classify each company by its primary industry sector", + categories=categories, + input=input_df, + ) + + assert isinstance(result, TableResult) + assert result.artifact_id is not None + assert "classification" in result.data.columns + assert len(result.data) == 3 + + for _, row in result.data.iterrows(): + assert row["classification"] in categories, ( + f"Invalid classification '{row['classification']}' for {row.get('company')}" + ) + + +async def test_classify_custom_field_and_reasoning(): + """Test custom classification_field and include_reasoning.""" + input_df = pd.DataFrame( + [ + {"company": "Tesla", "description": "Electric vehicles and clean energy"}, + ] + ) + categories = ["Technology", "Automotive", "Energy"] + + result = await classify( + task="Classify each company by its primary industry sector", + categories=categories, + input=input_df, + classification_field="sector", + include_reasoning=True, + ) + + assert isinstance(result, TableResult) + assert "sector" in result.data.columns + assert result.data["sector"].iloc[0] in categories From 62c83507ce1e8d89c0e78d5da637f29feb9a249d Mon Sep 17 00:00:00 2001 From: Dan Schwarz Date: Thu, 26 Feb 2026 20:58:36 -0800 Subject: [PATCH 2/6] docs: add classify and forecast to all operation listings Update all documentation, MCP server README, skill reference, and manifest to include classify and forecast alongside existing operations. Rewrite CLASSIFY.md reference with real GICS sector example. Update tool description to describe research scaling rather than implementation phases. Point Classifiers link in README to CLASSIFY instead of SCREEN and replace screen code example with classify. Co-Authored-By: Claude Opus 4.6 --- README.md | 22 +++---- docs/api.md | 2 +- docs/case-studies.md | 10 +++- docs/getting-started.md | 16 ++--- docs/guides.md | 11 +++- docs/mcp-server.md | 23 +++++++ docs/reference/CLASSIFY.md | 41 ++++++++++--- everyrow-mcp/README.md | 27 ++++++++- everyrow-mcp/manifest.json | 2 +- everyrow-mcp/src/everyrow_mcp/tools.py | 3 +- skills/everyrow-sdk/SKILL.md | 83 +++++++++++++++++++++++++- 11 files changed, 205 insertions(+), 35 deletions(-) diff --git a/README.md b/README.md index 761275e9..96bce6e1 100644 --- a/README.md +++ b/README.md @@ -29,7 +29,7 @@ Spin up a team of: | [**Agents**](https://everyrow.io/docs/reference/RESEARCH) | Research, then analyze | 1–3¢/researcher | 10k rows | | [**Forecasters**](https://everyrow.io/docs/reference/FORECAST) | Predict outcomes | 20-50¢/researcher | 10k rows | | [**Scorers**](https://everyrow.io/docs/reference/RANK) | Research, then score | 1-5¢/researcher | 10k rows | -| [**Classifiers**](https://everyrow.io/docs/reference/SCREEN) | Research, then categorize | 0.1-0.7¢/researcher | 10k rows | +| [**Classifiers**](https://everyrow.io/docs/reference/CLASSIFY) | Research, then categorize | 0.1-0.7¢/researcher | 10k rows | | [**Matchers**](https://everyrow.io/docs/reference/MERGE) | Find matching rows | 0.2-0.5¢/researcher | 20k rows | See the full [API reference](https://everyrow.io/docs/api), [guides](https://everyrow.io/docs/guides), and [case studies](https://everyrow.io/docs/case-studies), (for example, see our [case study](https://everyrow.io/docs/case-studies/llm-web-research-agents-at-scale) running a `Research` task on 10k rows, running agents that used 120k LLM calls.) @@ -203,23 +203,23 @@ Requires Python 3.12+. Then you can use the SDK directly: ```python import asyncio import pandas as pd -from everyrow.ops import screen -from pydantic import BaseModel, Field +from everyrow.ops import classify companies = pd.DataFrame([ - {"company": "Airtable",}, {"company": "Vercel",}, {"company": "Notion",} + {"company": "Apple"}, {"company": "JPMorgan Chase"}, {"company": "ExxonMobil"}, + {"company": "Tesla"}, {"company": "Pfizer"}, {"company": "Duke Energy"}, ]) -class JobScreenResult(BaseModel): - qualifies: bool = Field(description="True if company lists jobs with all criteria") - async def main(): - result = await screen( - task="""Qualifies if: 1. Remote-friendly, 2. Senior, and 3. Discloses salary""", + result = await classify( + task="Classify this company by its GICS industry sector", + categories=["Energy", "Materials", "Industrials", "Consumer Discretionary", + "Consumer Staples", "Health Care", "Financials", + "Information Technology", "Communication Services", + "Utilities", "Real Estate"], input=companies, - response_model=JobScreenResult, ) - print(result.data.head()) + print(result.data[["company", "classification"]]) asyncio.run(main()) ``` diff --git a/docs/api.md b/docs/api.md index 841d1dca..0349dc20 100644 --- a/docs/api.md +++ b/docs/api.md @@ -65,7 +65,7 @@ result = await classify( ) ``` -`classify` assigns each row in a DataFrame to one of the provided categories using a two-phase approach: Phase 1 attempts fast batch classification with web research, and Phase 2 follows up with deeper research on ambiguous rows. Supports binary (yes/no) and multi-category classification with optional reasoning output. +`classify` assigns each row in a DataFrame to one of the provided categories using web research that scales to the difficulty of the classification. Supports binary (yes/no) and multi-category classification with optional reasoning output. [Full reference →](/docs/reference/CLASSIFY) diff --git a/docs/case-studies.md b/docs/case-studies.md index c09d02c5..ee98bf1f 100644 --- a/docs/case-studies.md +++ b/docs/case-studies.md @@ -1,12 +1,16 @@ --- title: Case Studies -description: Runnable case studies demonstrating everyrow operations on real datasets — screen, rank, dedupe, merge, and research with LLM-powered agents. +description: Runnable case studies demonstrating everyrow operations on real datasets — classify, screen, rank, dedupe, merge, forecast, and research with LLM-powered agents. --- # Case Studies Runnable case studies with real datasets. Each case study demonstrates an everyrow operation end-to-end with output you can inspect. +## Classify + +- [Classify and Label Data](/docs/classify-dataframe-rows-llm) + ## Screen - [Screen 10,000 Rows](/docs/case-studies/llm-powered-screening-at-scale) @@ -33,6 +37,10 @@ Runnable case studies with real datasets. Each case study demonstrates an everyr - [Link Records Across Medical Datasets](/docs/case-studies/match-clinical-trials-to-papers) - [Merge Costs and Speed](/docs/case-studies/understanding-costs-and-speed-for-merge) +## Forecast + +- [Automating Forecasting Questions](https://futuresearch.ai/automating-forecasting-questions/) + ## Multi-Method - [Multi-Stage Lead Qualification](/docs/case-studies/multi-stage-lead-qualification) diff --git a/docs/getting-started.md b/docs/getting-started.md index 1361ea62..c998186a 100644 --- a/docs/getting-started.md +++ b/docs/getting-started.md @@ -150,13 +150,15 @@ df = await fetch_task_data("12345678-1234-1234-1234-123456789abc") ## Operations -| Operation | Description | -| ------------------------------- | ------------------------------------------ | -| [Screen](/reference/SCREEN) | Filter rows by criteria requiring judgment | -| [Rank](/reference/RANK) | Score rows by qualitative factors | -| [Dedupe](/reference/DEDUPE) | Deduplicate when fuzzy matching fails | -| [Merge](/reference/MERGE) | Join tables when keys don't match exactly | -| [Research](/reference/RESEARCH) | Run web agents to research each row | +| Operation | Description | +| --------------------------------- | ------------------------------------------ | +| [Classify](/reference/CLASSIFY) | Categorize rows into predefined classes | +| [Screen](/reference/SCREEN) | Filter rows by criteria requiring judgment | +| [Rank](/reference/RANK) | Score rows by qualitative factors | +| [Dedupe](/reference/DEDUPE) | Deduplicate when fuzzy matching fails | +| [Merge](/reference/MERGE) | Join tables when keys don't match exactly | +| [Forecast](/reference/FORECAST) | Predict probabilities for binary questions | +| [Research](/reference/RESEARCH) | Run web agents to research each row | ## See Also diff --git a/docs/guides.md b/docs/guides.md index 9b513caf..7167b3d2 100644 --- a/docs/guides.md +++ b/docs/guides.md @@ -1,12 +1,16 @@ --- title: Guides -description: Step-by-step tutorials for using everyrow to screen, rank, dedupe, merge, and research data with LLM-powered agents. +description: Step-by-step tutorials for using everyrow to classify, screen, rank, dedupe, merge, forecast, and research data with LLM-powered agents. --- # Guides Practical walkthroughs that show you how to use everyrow for common data processing tasks. Each guide covers a single operation end-to-end with working code. +## Classify + +- [Classify and Label Rows](/docs/classify-dataframe-rows-llm) + ## Screen - [Filter a Dataset Intelligently](/docs/filter-dataframe-with-llm) @@ -25,8 +29,11 @@ Practical walkthroughs that show you how to use everyrow for common data process - [Join Tables Without Shared Keys](/docs/fuzzy-join-without-keys) +## Forecast + +- [Forecast Binary Questions](/docs/reference/FORECAST) + ## Research - [Add a Column via Web Research](/docs/add-column-web-lookup) -- [Classify and Label Rows](/docs/classify-dataframe-rows-llm) - [LLM-Powered Data Labeling](/docs/active-learning-llm-oracle) diff --git a/docs/mcp-server.md b/docs/mcp-server.md index 932aab75..658fcc79 100644 --- a/docs/mcp-server.md +++ b/docs/mcp-server.md @@ -65,6 +65,29 @@ Join two CSVs using intelligent entity matching (LEFT JOIN semantics). Returns `task_id` and `session_url`. Call `everyrow_progress` to monitor. +### everyrow_classify + +Classify each row into one of the provided categories. + +| Parameter | Type | Required | Description | +|-----------|------|----------|-------------| +| `task` | string | Yes | Classification instructions. | +| `categories` | list[string] | Yes | Allowed categories (minimum 2). Each row is assigned exactly one. | +| `classification_field` | string | No | Output column name (default: `"classification"`). | +| `include_reasoning` | boolean | No | Include a reasoning column (default: false). | + +Returns `task_id` and `session_url`. Call `everyrow_progress` to monitor. + +### everyrow_forecast + +Forecast the probability of binary questions. + +| Parameter | Type | Required | Description | +|-----------|------|----------|-------------| +| `context` | string | No | Optional batch-level context for all questions. | + +Returns `task_id` and `session_url`. Call `everyrow_progress` to monitor. + ### everyrow_agent Run web research agents on each row. diff --git a/docs/reference/CLASSIFY.md b/docs/reference/CLASSIFY.md index 111773a8..e59ad62a 100644 --- a/docs/reference/CLASSIFY.md +++ b/docs/reference/CLASSIFY.md @@ -5,29 +5,56 @@ description: API reference for the EveryRow classify tool, which assigns each ro # Classify -`classify` takes a DataFrame and a list of allowed categories, then assigns each row to exactly one category. Uses a two-phase approach: Phase 1 attempts fast batch classification with web research, and Phase 2 follows up with deeper research on ambiguous rows. +`classify` takes a DataFrame and a list of allowed categories, then assigns each row to exactly one category using web research that scales to the difficulty of the classification. Supports binary (yes/no) and multi-category classification with optional reasoning output. ## Examples +### GICS sector classification + ```python from pandas import DataFrame from everyrow.ops import classify companies = DataFrame([ - {"company": "Apple Inc.", "description": "Consumer electronics and software"}, - {"company": "JPMorgan Chase", "description": "Investment banking and financial services"}, - {"company": "ExxonMobil", "description": "Oil and gas exploration and production"}, + {"company": "Apple"}, + {"company": "JPMorgan Chase"}, + {"company": "ExxonMobil"}, + {"company": "Pfizer"}, + {"company": "Procter & Gamble"}, + {"company": "Tesla"}, + {"company": "AT&T"}, + {"company": "Caterpillar"}, + {"company": "Duke Energy"}, + {"company": "Simon Property Group"}, ]) result = await classify( - task="Classify each company by its primary industry sector", - categories=["Technology", "Finance", "Healthcare", "Energy"], + task="Classify this company by its GICS industry sector", + categories=[ + "Energy", "Materials", "Industrials", "Consumer Discretionary", + "Consumer Staples", "Health Care", "Financials", + "Information Technology", "Communication Services", + "Utilities", "Real Estate", + ], input=companies, ) print(result.data[["company", "classification"]]) ``` -The output DataFrame contains the original columns plus the classification column (default name: `classification`). +Output: + +| company | classification | +|----------------------|------------------------| +| Apple | Information Technology | +| JPMorgan Chase | Financials | +| ExxonMobil | Energy | +| Pfizer | Health Care | +| Procter & Gamble | Consumer Staples | +| Tesla | Consumer Discretionary | +| AT&T | Communication Services | +| Caterpillar | Industrials | +| Duke Energy | Utilities | +| Simon Property Group | Real Estate | ### Binary classification diff --git a/everyrow-mcp/README.md b/everyrow-mcp/README.md index adfbe727..0fe18aad 100644 --- a/everyrow-mcp/README.md +++ b/everyrow-mcp/README.md @@ -2,7 +2,7 @@ MCP (Model Context Protocol) server for [everyrow](https://everyrow.io): agent ops at spreadsheet scale. -This server exposes everyrow's 5 core operations as MCP tools, allowing LLM applications to screen, rank, dedupe, merge, and run agents on CSV files. +This server exposes everyrow's core operations as MCP tools, allowing LLM applications to classify, screen, rank, dedupe, merge, forecast, and run agents on CSV files. **All tools operate on local CSV files.** Provide absolute file paths as input, and transformed results are written to new CSV files at your specified output path. @@ -116,6 +116,31 @@ Parameters: Example: Match software products (left, enriched) to parent companies (right, lookup): Photoshop -> Adobe +### everyrow_classify + +Classify each row into one of the provided categories. + +``` +Parameters: +- task: Natural language classification instructions +- categories: Allowed categories (minimum 2) +- classification_field: (optional) Output column name (default: "classification") +- include_reasoning: (optional) Include reasoning column (default: false) +``` + +Example: Classify companies by GICS sector with categories ["Energy", "Financials", "Information Technology", ...] + +### everyrow_forecast + +Forecast the probability of binary questions. + +``` +Parameters: +- context: (optional) Batch-level context for all questions +``` + +Example: "Will the US Federal Reserve cut rates before July 2027?" + ### everyrow_agent Run web research agents on each row of a CSV. diff --git a/everyrow-mcp/manifest.json b/everyrow-mcp/manifest.json index 697e640f..e44867a5 100644 --- a/everyrow-mcp/manifest.json +++ b/everyrow-mcp/manifest.json @@ -101,7 +101,7 @@ "python": ">=3.12" } }, - "keywords": ["everyrow", "dataframe", "csv", "ai", "data-processing", "dedupe", "merge", "rank", "screen"], + "keywords": ["everyrow", "dataframe", "csv", "ai", "data-processing", "classify", "dedupe", "merge", "rank", "screen", "forecast"], "license": "MIT", "privacy_policies": ["https://futuresearch.ai/privacy/"] } diff --git a/everyrow-mcp/src/everyrow_mcp/tools.py b/everyrow-mcp/src/everyrow_mcp/tools.py index 9a7f8e97..3100bc38 100644 --- a/everyrow-mcp/src/everyrow_mcp/tools.py +++ b/everyrow-mcp/src/everyrow_mcp/tools.py @@ -715,8 +715,7 @@ async def everyrow_classify( ) -> list[TextContent]: """Classify each row of a dataset into one of the provided categories. - Uses a two-phase approach: Phase 1 attempts fast batch classification with - web research, and Phase 2 follows up with deeper research on ambiguous rows. + Uses web research that scales to the difficulty of the classification. Each row is assigned exactly one of the provided categories. Examples: diff --git a/skills/everyrow-sdk/SKILL.md b/skills/everyrow-sdk/SKILL.md index f5fd7b49..ab6f6873 100644 --- a/skills/everyrow-sdk/SKILL.md +++ b/skills/everyrow-sdk/SKILL.md @@ -12,10 +12,12 @@ everyrow gives Claude a research team for your data. Use this skill when writing > - GitHub: [github.com/futuresearch/everyrow-sdk](https://github.com/futuresearch/everyrow-sdk) **Operations:** +- Classify rows into predefined categories - Rank/score rows based on qualitative criteria - Deduplicate data using semantic understanding - Merge tables using AI-powered matching - Screen/filter rows based on research-intensive criteria +- Forecast probabilities for binary questions - Run AI agents over dataframe rows ## Installation @@ -28,7 +30,7 @@ pip install everyrow ### MCP Server (for Claude Code, Claude Desktop, Cursor, etc.) -If an MCP server is available (`everyrow_screen`, `everyrow_rank`, etc. tools), you can use it directly without writing Python code. The MCP server operates on local CSV files. +If an MCP server is available (`everyrow_classify`, `everyrow_screen`, `everyrow_rank`, etc. tools), you can use it directly without writing Python code. The MCP server operates on local CSV files. To install the MCP server, add to your MCP config: @@ -84,6 +86,16 @@ export EVERYROW_API_KEY= If you have the everyrow MCP server configured, these tools operate directly on CSV files. +### everyrow_classify +Classify each row into one of the provided categories. +``` +Parameters: +- task: Natural language classification instructions +- categories: Allowed categories (minimum 2) +- classification_field: (optional) Output column name (default: "classification") +- include_reasoning: (optional) Include reasoning column (default: false) +``` + ### everyrow_screen Filter CSV rows based on criteria that require judgment. ``` @@ -129,6 +141,13 @@ Parameters: - use_web_search: (optional) "auto" (default), "yes", or "no" ``` +### everyrow_forecast +Forecast the probability of binary questions. +``` +Parameters: +- context: (optional) Batch-level context for all questions +``` + ### everyrow_agent Run web research agents on each row of a CSV. ``` @@ -244,6 +263,48 @@ print(result.data.head()) Parameters: `task`, `left_table`, `right_table`, `merge_on_left`, `merge_on_right`, `relationship_type`, `use_web_search`, `session` +### classify - Categorize rows + +Assign each row to one of the provided categories: + +```python +from everyrow.ops import classify + +result = await classify( + task="Classify this company by its GICS industry sector", + categories=["Energy", "Materials", "Industrials", "Consumer Discretionary", + "Consumer Staples", "Health Care", "Financials", + "Information Technology", "Communication Services", + "Utilities", "Real Estate"], + input=companies, +) +print(result.data[["company", "classification"]]) +``` + +**Binary classification** - for yes/no questions, use two categories: + +```python +result = await classify( + task="Is this company founder-led?", + categories=["yes", "no"], + input=companies, +) +``` + +**With reasoning** - understand why each row was classified: + +```python +result = await classify( + task="Classify each company by its primary industry sector", + categories=["Technology", "Finance", "Healthcare", "Energy"], + input=companies, + classification_field="sector", + include_reasoning=True, +) +``` + +Parameters: `task`, `categories`, `input`, `classification_field` (default: "classification"), `include_reasoning` (default: False), `session` + ### screen - Evaluate and filter rows Filter rows based on criteria that require research: @@ -284,6 +345,24 @@ result = await screen( Parameters: `task`, `input`, `response_model`, `session` +### forecast - Predict probabilities + +Produce calibrated probability estimates for binary questions: + +```python +from everyrow.ops import forecast + +result = await forecast( + input=DataFrame([ + {"question": "Will the US Federal Reserve cut rates by at least 25bp before July 1, 2027?", + "resolution_criteria": "Resolves YES if the Fed announces at least one rate cut of 25bp or more."}, + ]), +) +print(result.data[["question", "probability", "rationale"]]) +``` + +Parameters: `input`, `context`, `session` + ### single_agent - Single input task Run an AI agent on a single input: @@ -412,7 +491,7 @@ df = await fetch_task_data("12345678-1234-1234-1234-123456789abc") ## Everyrow Long-Running Operations (MCP) -Everyrow operations (screen, rank, dedupe, merge, agent) take 1-10+ minutes. +Everyrow operations (classify, screen, rank, dedupe, merge, forecast, agent) take 1-10+ minutes. All MCP tools use an async pattern: 1. Call the operation tool (e.g., `everyrow_agent(...)`) to get task_id and session_url From 57f97a3c818378cd82a5469bcabb483f16a49d82 Mon Sep 17 00:00:00 2001 From: Dan Schwarz Date: Fri, 27 Feb 2026 06:52:00 -0800 Subject: [PATCH 3/6] style: format test_classify.py Co-Authored-By: Claude Opus 4.6 --- tests/integration/test_classify.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/tests/integration/test_classify.py b/tests/integration/test_classify.py index 688fd2ee..10fc6ecf 100644 --- a/tests/integration/test_classify.py +++ b/tests/integration/test_classify.py @@ -13,7 +13,10 @@ async def test_classify_assigns_categories(): """Test that classify returns a TableResult with correct categories.""" input_df = pd.DataFrame( [ - {"company": "Apple Inc.", "description": "Consumer electronics and software"}, + { + "company": "Apple Inc.", + "description": "Consumer electronics and software", + }, { "company": "JPMorgan Chase", "description": "Investment banking and financial services", From 9265b4e101cfb36b8eb7b538b892c8a8799ef104 Mon Sep 17 00:00:00 2001 From: Dan Schwarz Date: Fri, 27 Feb 2026 06:53:02 -0800 Subject: [PATCH 4/6] fix: add everyrow_classify to test tool lists Co-Authored-By: Claude Opus 4.6 --- everyrow-mcp/tests/test_http_transport.py | 3 ++- everyrow-mcp/tests/test_mcp_e2e.py | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/everyrow-mcp/tests/test_http_transport.py b/everyrow-mcp/tests/test_http_transport.py index c87516c2..7f194d37 100644 --- a/everyrow-mcp/tests/test_http_transport.py +++ b/everyrow-mcp/tests/test_http_transport.py @@ -207,13 +207,14 @@ async def test_health_endpoint(self, http_client: httpx.AsyncClient): assert r.json() == {"status": "ok"} async def test_list_tools(self, mcp_server: str): - """session.list_tools() returns all 8 registered tools.""" + """session.list_tools() returns all registered tools.""" async with open_mcp_session(mcp_server) as session: resp = await session.list_tools() tool_names = sorted(t.name for t in resp.tools) expected = sorted( [ "everyrow_agent", + "everyrow_classify", "everyrow_single_agent", "everyrow_rank", "everyrow_screen", diff --git a/everyrow-mcp/tests/test_mcp_e2e.py b/everyrow-mcp/tests/test_mcp_e2e.py index 7eaeb2cc..a6d32b33 100644 --- a/everyrow-mcp/tests/test_mcp_e2e.py +++ b/everyrow-mcp/tests/test_mcp_e2e.py @@ -175,6 +175,7 @@ async def test_list_tools(self, _http_state): "everyrow_agent", "everyrow_balance", "everyrow_cancel", + "everyrow_classify", "everyrow_dedupe", "everyrow_forecast", "everyrow_list_sessions", From 467caffa0c9baa73bd5986d2295a9b687130c541 Mon Sep 17 00:00:00 2001 From: Dan Schwarz Date: Fri, 27 Feb 2026 06:54:59 -0800 Subject: [PATCH 5/6] chore: bump plugin version to 0.4.1 for skill changes Co-Authored-By: Claude Opus 4.6 --- .claude-plugin/plugin.json | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.claude-plugin/plugin.json b/.claude-plugin/plugin.json index f5c2e2a2..9d3419e3 100644 --- a/.claude-plugin/plugin.json +++ b/.claude-plugin/plugin.json @@ -1,7 +1,7 @@ { "name": "everyrow", "description": "Give Claude Code a research team. Forecast, score, classify, or research every row of a dataset.", - "version": "0.4.0", + "version": "0.4.1", "author": { "name": "FutureSearch" }, From ca96fd2dc9adb30aba2e209523c54054bfaccf22 Mon Sep 17 00:00:00 2001 From: Dan Schwarz Date: Fri, 27 Feb 2026 06:57:35 -0800 Subject: [PATCH 6/6] chore: bump all version files to 0.4.1 Co-Authored-By: Claude Opus 4.6 --- .claude-plugin/marketplace.json | 2 +- everyrow-mcp/manifest.json | 2 +- everyrow-mcp/pyproject.toml | 2 +- everyrow-mcp/server.json | 4 ++-- gemini-extension.json | 2 +- pyproject.toml | 2 +- 6 files changed, 7 insertions(+), 7 deletions(-) diff --git a/.claude-plugin/marketplace.json b/.claude-plugin/marketplace.json index 2b3c495d..ec7956cc 100644 --- a/.claude-plugin/marketplace.json +++ b/.claude-plugin/marketplace.json @@ -11,7 +11,7 @@ "name": "everyrow", "source": "./", "description": "Give Claude Code a research team. Forecast, score, classify, or research every row of a dataset.", - "version": "0.4.0" + "version": "0.4.1" } ] } diff --git a/everyrow-mcp/manifest.json b/everyrow-mcp/manifest.json index e44867a5..481a47d4 100644 --- a/everyrow-mcp/manifest.json +++ b/everyrow-mcp/manifest.json @@ -2,7 +2,7 @@ "manifest_version": "0.4", "name": "everyrow-mcp", "display_name": "Everyrow MCP Server", - "version": "0.4.0", + "version": "0.4.1", "description": "Give your AI a research team. Forecast, score, classify, or research every row of a dataset.", "long_description": "MCP server for everyrow: give your AI a research team. Each operation dispatches web research agents across a dataset to forecast, score, classify, deduplicate, merge, or research at scale.", "author": { diff --git a/everyrow-mcp/pyproject.toml b/everyrow-mcp/pyproject.toml index 60a87dd6..87a72b70 100644 --- a/everyrow-mcp/pyproject.toml +++ b/everyrow-mcp/pyproject.toml @@ -1,6 +1,6 @@ [project] name = "everyrow-mcp" -version = "0.4.0" +version = "0.4.1" description = "MCP server for everyrow: a researcher for every row" readme = "README.md" requires-python = ">=3.12" diff --git a/everyrow-mcp/server.json b/everyrow-mcp/server.json index 7605a8ac..3989200b 100644 --- a/everyrow-mcp/server.json +++ b/everyrow-mcp/server.json @@ -9,13 +9,13 @@ "subfolder": "everyrow-mcp" }, "websiteUrl": "https://github.com/futuresearch/everyrow-sdk/tree/main/everyrow-mcp", - "version": "0.4.0", + "version": "0.4.1", "packages": [ { "registryType": "pypi", "registryBaseUrl": "https://pypi.org", "identifier": "everyrow-mcp", - "version": "0.4.0", + "version": "0.4.1", "runtimeHint": "uvx", "transport": { "type": "stdio" diff --git a/gemini-extension.json b/gemini-extension.json index 6c00536e..a1803c4d 100644 --- a/gemini-extension.json +++ b/gemini-extension.json @@ -1,7 +1,7 @@ { "name": "everyrow", "description": "Gemini extension for the everyrow SDK - AI-powered data processing utilities for transforming, deduping, merging, ranking, and screening dataframes", - "version": "0.4.0", + "version": "0.4.1", "mcpServers": { "everyrow": { "command": "uvx", diff --git a/pyproject.toml b/pyproject.toml index 3e9efb51..43444d0b 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -6,7 +6,7 @@ everyrow = { workspace = true } [project] name = "everyrow" -version = "0.4.0" +version = "0.4.1" description = "A researcher for every row. Forecast, score, classify, or research entire datasets." readme = "README.md" requires-python = ">=3.12"