diff --git a/docs-site/scripts/check-links.py b/docs-site/scripts/check-links.py index dcd2f02d..5f88bf0d 100644 --- a/docs-site/scripts/check-links.py +++ b/docs-site/scripts/check-links.py @@ -71,6 +71,7 @@ "https://www.kaggle.com/code/rafaelpoyiadzi/active-learning-with-an-llm-oracle", "https://www.kaggle.com/datasets/tunguz/pubmed-title-abstracts-2019-baseline", "https://arxiv.org/abs/2506.21558", + "https://media.githubusercontent.com/media/futuresearch/everyrow-sdk/refs/heads/main/docs/data/fda_products.csv" } diff --git a/docs/mcp-server.md b/docs/mcp-server.md index 932aab75..1f53d6eb 100644 --- a/docs/mcp-server.md +++ b/docs/mcp-server.md @@ -60,7 +60,7 @@ Join two CSVs using intelligent entity matching (LEFT JOIN semantics). | `right_csv` | string | Yes | The lookup/reference table — its columns are appended to matches; unmatched left rows get nulls. | | `merge_on_left` | string | No | Only set if you expect exact string matches on this column or want to draw agent attention to it. Fine to omit. | | `merge_on_right` | string | No | Only set if you expect exact string matches on this column or want to draw agent attention to it. Fine to omit. | -| `relationship_type` | string | No | `many_to_one` (default) — multiple left rows can match one right row. `one_to_one` — only when both tables have unique entities of the same kind. | +| `relationship_type` | string | No | `many_to_one` (default) — multiple left rows can match one right row. `one_to_one` — unique matching between left and right rows. `one_to_many` — one left row can match multiple right rows. `many_to_many` — multiple left rows can match multiple right rows. For `one_to_many` and `many_to_many`, multiple matches are joined with `" \| "` in each added column. | | `use_web_search` | string | No | `auto` (default), `yes`, or `no`. | Returns `task_id` and `session_url`. Call `everyrow_progress` to monitor. diff --git a/docs/reference/MERGE.md b/docs/reference/MERGE.md index 79eddede..167a72f3 100644 --- a/docs/reference/MERGE.md +++ b/docs/reference/MERGE.md @@ -57,7 +57,7 @@ A DataFrame with all left table columns plus matched right table columns. Rows t | `right_table` | DataFrame | The lookup/reference table — its columns are appended to matches; unmatched left rows get nulls. | | `merge_on_left` | Optional[str] | Only set if you expect exact string matches on this column or want to draw agent attention to it. Auto-detected if omitted. | | `merge_on_right` | Optional[str] | Only set if you expect exact string matches on this column or want to draw agent attention to it. Auto-detected if omitted. | -| `relationship_type` | Optional[str] | `"many_to_one"` (default) — multiple left rows can match one right row. `"one_to_one"` — only when both tables have unique entities of the same kind. | +| `relationship_type` | Optional[str] | `"many_to_one"` (default) — multiple left rows can match one right row. `"one_to_one"` — unique matching between left and right rows. `"one_to_many"` — one left row can match multiple right rows. `"many_to_many"` — multiple left rows can match multiple right rows. For `one_to_many` and `many_to_many`, multiple matches are joined with `" \| "` in each added column. | | `use_web_search` | Optional[str] | `"auto"` (default), `"yes"`, or `"no"`. Controls whether agents use web search to resolve matches. | | `session` | Session | Optional, auto-created if omitted | diff --git a/everyrow-mcp/README.md b/everyrow-mcp/README.md index adfbe727..ce6104cc 100644 --- a/everyrow-mcp/README.md +++ b/everyrow-mcp/README.md @@ -111,7 +111,7 @@ Parameters: - merge_on_left: (optional) Only set if you expect exact string matches on this column or want to draw agent attention to it. Fine to omit. - merge_on_right: (optional) Only set if you expect exact string matches on this column or want to draw agent attention to it. Fine to omit. - use_web_search: (optional) "auto" (default), "yes", or "no" -- relationship_type: (optional) "many_to_one" (default) — multiple left rows can match one right row. "one_to_one" — only when both tables have unique entities of the same kind. +- relationship_type: (optional) "many_to_one" (default) if multiple left rows can match one right row, "one_to_one" matches must be unique, "one_to_many" one left row can match multiple right rows, "many_to_many" multiple left rows can match multiple right rows. For one_to_many and many_to_many, multiple matches are joined with " | " in each added column. ``` Example: Match software products (left, enriched) to parent companies (right, lookup): Photoshop -> Adobe diff --git a/everyrow-mcp/src/everyrow_mcp/models.py b/everyrow-mcp/src/everyrow_mcp/models.py index 468f71a2..98ccb4e0 100644 --- a/everyrow-mcp/src/everyrow_mcp/models.py +++ b/everyrow-mcp/src/everyrow_mcp/models.py @@ -354,20 +354,22 @@ class MergeInput(BaseModel): merge_on_left: str | None = Field( default=None, - description="Column name in the left table to match on.", + description="Only set if you expect some exact string matches on the chosen column or want to draw special attention of LLM agents to this particular column. Fine to leave unspecified in all other cases.", ) merge_on_right: str | None = Field( default=None, - description="Column name in the right table to match on.", + description="Only set if you expect some exact string matches on the chosen column or want to draw special attention of LLM agents to this particular column. Fine to leave unspecified in all other cases.", ) use_web_search: Literal["auto", "yes", "no"] | None = Field( default=None, description='Control web search: "auto", "yes", or "no".', ) - relationship_type: Literal["many_to_one", "one_to_one"] | None = Field( + relationship_type: ( + Literal["many_to_one", "one_to_one", "one_to_many", "many_to_many"] | None + ) = Field( default=None, - description="Relationship type: many_to_one (default) or one_to_one.", + description='Control merge relationship type / cardinality between the two tables: "many_to_one" (default) allows multiple left rows to match one right row (e.g. matching reviews to product), "one_to_one" enforces unique matching between left and right rows (e.g. CEO to company), "one_to_many" allows one left row to match multiple right rows (e.g. company to products), "many_to_many" allows multiple left rows to match multiple right rows (e.g. companies to investors). For one_to_many and many_to_many, multiple matches are represented by joining the right-table values with " | " in each added column.', ) session_id: str | None = Field( diff --git a/everyrow-mcp/src/everyrow_mcp/tools.py b/everyrow-mcp/src/everyrow_mcp/tools.py index c9572e07..61be5479 100644 --- a/everyrow-mcp/src/everyrow_mcp/tools.py +++ b/everyrow-mcp/src/everyrow_mcp/tools.py @@ -544,7 +544,8 @@ async def everyrow_merge(params: MergeInput, ctx: EveryRowContext) -> list[TextC - merge_on_left/merge_on_right: only set if you expect exact string matches on the chosen columns or want to draw agent attention to them. Fine to omit. - relationship_type: defaults to many_to_one, which is correct in most cases. - Only set one_to_one when both tables have unique entities of the same kind. + For one_to_many and many_to_many, multiple right-table matches are joined + with " | " in each added column. Examples: - Match software products (left, enriched) to parent companies (right, lookup): @@ -553,6 +554,12 @@ async def everyrow_merge(params: MergeInput, ctx: EveryRowContext) -> list[TextC Genentech -> Roche. relationship_type: many_to_one. - Join two contact lists with different name formats: relationship_type: one_to_one (each person appears once in each list). + - Match a company (left) to its products (right): + relationship_type: one_to_many (one company has many products; + matched product names joined with " | "). + - Match companies (left) to investors (right): + relationship_type: many_to_many (companies share investors and vice versa; + matched values joined with " | "). This function submits the task and returns immediately with a task_id and session_url. After receiving a result from this tool, share the session_url with the user. diff --git a/skills/everyrow-sdk/SKILL.md b/skills/everyrow-sdk/SKILL.md index f5fd7b49..6d527f7a 100644 --- a/skills/everyrow-sdk/SKILL.md +++ b/skills/everyrow-sdk/SKILL.md @@ -125,7 +125,7 @@ Parameters: - output_path: Directory or full .csv path for output - merge_on_left: (optional) Only set if you expect exact string matches on the chosen column or want to draw agent attention to it. Fine to omit. - merge_on_right: (optional) Only set if you expect exact string matches on the chosen column or want to draw agent attention to it. Fine to omit. -- relationship_type: (optional) Defaults to "many_to_one", which is correct in most cases (e.g. products → companies). Only set "one_to_one" when both tables have unique entities of the same kind. +- relationship_type: (optional) Defaults to "many_to_one", which is correct in most cases (e.g. products → companies). "one_to_one" when both tables have unique entities of the same kind. "one_to_many" when one left row can match multiple right rows (e.g. company → products). "many_to_many" when multiple left rows can match multiple right rows (e.g. companies → investors). For one_to_many and many_to_many, multiple matches are joined with " | " in each added column. - use_web_search: (optional) "auto" (default), "yes", or "no" ``` diff --git a/src/everyrow/generated/models/merge_operation.py b/src/everyrow/generated/models/merge_operation.py index 12612905..729b6c98 100644 --- a/src/everyrow/generated/models/merge_operation.py +++ b/src/everyrow/generated/models/merge_operation.py @@ -37,7 +37,10 @@ class MergeOperation: without initial LLM merge Default: MergeOperationUseWebSearchType0.AUTO. relationship_type (MergeOperationRelationshipTypeType0 | None | Unset): Control merge relationship behavior: 'many_to_one' (default) allows multiple left rows to match the same right row, 'one_to_one' enforces unique - matches and resolves clashes Default: MergeOperationRelationshipTypeType0.MANY_TO_ONE. + matches and resolves clashes, 'one_to_many' allows one left row to match multiple right rows, + 'many_to_many' allows multiple left rows to match multiple right rows. For one_to_many and many_to_many, + multiple matches are joined with " | " in each added column. Default: + MergeOperationRelationshipTypeType0.MANY_TO_ONE. session_id (None | Unset | UUID): Session ID. If not provided, a new session is auto-created for this task. webhook_url (None | str | Unset): Optional URL to receive a POST callback when the task completes or fails. """ diff --git a/src/everyrow/generated/models/merge_operation_relationship_type_type_0.py b/src/everyrow/generated/models/merge_operation_relationship_type_type_0.py index 6720f75f..37d14bbf 100644 --- a/src/everyrow/generated/models/merge_operation_relationship_type_type_0.py +++ b/src/everyrow/generated/models/merge_operation_relationship_type_type_0.py @@ -4,6 +4,8 @@ class MergeOperationRelationshipTypeType0(str, Enum): MANY_TO_ONE = "many_to_one" ONE_TO_ONE = "one_to_one" + ONE_TO_MANY = "one_to_many" + MANY_TO_MANY = "many_to_many" def __str__(self) -> str: return str(self.value) diff --git a/src/everyrow/ops.py b/src/everyrow/ops.py index 0f3ba2e0..4941d327 100644 --- a/src/everyrow/ops.py +++ b/src/everyrow/ops.py @@ -583,7 +583,10 @@ async def merge( merge_on_left: str | None = None, merge_on_right: str | None = None, use_web_search: Literal["auto", "yes", "no"] | None = None, - relationship_type: Literal["many_to_one", "one_to_one"] | None = None, + relationship_type: Literal[ + "many_to_one", "one_to_one", "one_to_many", "many_to_many" + ] + | None = None, ) -> MergeResult: """Merge two tables using AI (LEFT JOIN semantics). @@ -595,7 +598,7 @@ async def merge( merge_on_left: Only set if you expect exact string matches on this column or want to draw agent attention to it. Auto-detected if omitted. merge_on_right: Only set if you expect exact string matches on this column or want to draw agent attention to it. Auto-detected if omitted. use_web_search: Control web search behavior: "auto" (default) tries LLM merge first then conditionally searches, "no" skips web search entirely, "yes" forces web search on every row. - relationship_type: Defaults to "many_to_one", which is correct in most cases (multiple left rows can match one right row, e.g. products → companies). Only use "one_to_one" when both tables have unique entities of the same kind. + relationship_type: Control merge relationship type / cardinality between the two tables: "many_to_one" (default) allows multiple left rows to match one right row (e.g. matching reviews to product), "one_to_one" enforces unique matching between left and right rows (e.g. CEO to company), "one_to_many" allows one left row to match multiple right rows (e.g. company to products), "many_to_many" allows multiple left rows to match multiple right rows (e.g. companies to investors). For one_to_many and many_to_many, multiple matches are represented by joining the right-table values with " | " in each added column. Returns: MergeResult containing the merged table and match breakdown by method (exact, fuzzy, llm, web) @@ -642,7 +645,10 @@ async def merge_async( merge_on_left: str | None = None, merge_on_right: str | None = None, use_web_search: Literal["auto", "yes", "no"] | None = None, - relationship_type: Literal["many_to_one", "one_to_one"] | None = None, + relationship_type: Literal[ + "many_to_one", "one_to_one", "one_to_many", "many_to_many" + ] + | None = None, ) -> MergeTask: """Submit a merge task asynchronously.