From 7e180d290450a6826caaa9b5478c3df94dedb48b Mon Sep 17 00:00:00 2001 From: AMATH <116212274+amathxbt@users.noreply.github.com> Date: Tue, 24 Mar 2026 00:44:55 +0100 Subject: [PATCH 1/4] fix(tee_registry): randomize TEE selection to improve load distribution Previously get_llm_tee() always returned tees[0], the first TEE in the registry list. This caused all clients to hit the same TEE, providing no load distribution and no resilience when that TEE starts failing. Fix: use random.choice(tees) so each LLM() construction independently picks from all currently active TEEs. Successive retries or re-initializations will therefore naturally spread across the healthy pool. Closes #200 --- src/opengradient/client/tee_registry.py | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) diff --git a/src/opengradient/client/tee_registry.py b/src/opengradient/client/tee_registry.py index 9ad3cfd..b791a38 100644 --- a/src/opengradient/client/tee_registry.py +++ b/src/opengradient/client/tee_registry.py @@ -1,6 +1,7 @@ """TEE Registry client for fetching verified TEE endpoints and TLS certificates.""" import logging +import random import ssl from dataclasses import dataclass from typing import List, NamedTuple, Optional @@ -109,17 +110,32 @@ def get_active_tees_by_type(self, tee_type: int) -> List[TEEEndpoint]: def get_llm_tee(self) -> Optional[TEEEndpoint]: """ - Return the first active LLM proxy TEE from the registry. + Return a randomly selected active LLM proxy TEE from the registry. + + Randomizing the selection distributes load across all healthy TEEs and + avoids repeatedly routing to the same TEE when it starts failing + (addresses issue #200 — improve TEE selection/retry logic). Returns: - TEEEndpoint for an active LLM proxy TEE, or None if none are available. + TEEEndpoint for a randomly chosen active LLM proxy TEE, or None if + none are available. """ tees = self.get_active_tees_by_type(TEE_TYPE_LLM_PROXY) if not tees: logger.warning("No active LLM proxy TEEs found in registry") return None - return tees[0] + # Randomly select from all active TEEs to distribute load and improve + # resilience — if one TEE is failing, successive LLM() constructions + # will eventually land on a healthy one. + selected = random.choice(tees) + logger.debug( + "Selected TEE %s (endpoint=%s) from %d active LLM proxy TEE(s)", + selected.tee_id, + selected.endpoint, + len(tees), + ) + return selected def build_ssl_context_from_der(der_cert: bytes) -> ssl.SSLContext: From cddb41354a063be79df701c3b7aaae6bdb99fdbe Mon Sep 17 00:00:00 2001 From: AMATH <116212274+amathxbt@users.noreply.github.com> Date: Tue, 24 Mar 2026 00:46:40 +0100 Subject: [PATCH 2/4] fix(llm): surface HTTP 402 as actionable error with OPG approval hint MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously any HTTP error from the TEE (including 402 Payment Required) was silently wrapped into a generic RuntimeError("TEE LLM chat failed: ..."). This caused the confusing traceback seen in issue #188 — the real cause (insufficient OPG allowance) was buried inside the exception message. Fix: - Add `import httpx` and intercept httpx.HTTPStatusError before the generic `except Exception` handler in both _chat_request() and completion(). - When status == 402, raise a RuntimeError with a clear, actionable hint telling the user to call llm.ensure_opg_approval(opg_amount=). - Also handle 402 in _parse_sse_response() for the streaming path. - All other HTTP errors continue to surface as before. Closes #188 --- src/opengradient/client/llm.py | 29 ++++++++++++++++++++++++----- 1 file changed, 24 insertions(+), 5 deletions(-) diff --git a/src/opengradient/client/llm.py b/src/opengradient/client/llm.py index eecfa14..d66bf00 100644 --- a/src/opengradient/client/llm.py +++ b/src/opengradient/client/llm.py @@ -1,11 +1,12 @@ """LLM chat and completion via TEE-verified execution with x402 payments.""" -import json +import json as _json import logging import ssl from dataclasses import dataclass from typing import AsyncGenerator, Dict, List, Optional, Union +import httpx from eth_account import Account from eth_account.account import LocalAccount from x402 import x402Client @@ -31,6 +32,12 @@ _COMPLETION_ENDPOINT = "/v1/completions" _REQUEST_TIMEOUT = 60 +_402_HINT = ( + "Payment required (HTTP 402): your wallet may have insufficient OPG token allowance. " + "Call llm.ensure_opg_approval(opg_amount=) to approve Permit2 spending " + "before making requests. Minimum amount is 0.05 OPG." +) + @dataclass class _ChatParams: @@ -267,7 +274,7 @@ async def completion( ) response.raise_for_status() raw_body = await response.aread() - result = json.loads(raw_body.decode()) + result = _json.loads(raw_body.decode()) return TextGenerationOutput( transaction_hash="external", completion_output=result.get("completion"), @@ -277,6 +284,10 @@ async def completion( ) except RuntimeError: raise + except httpx.HTTPStatusError as e: + if e.response.status_code == 402: + raise RuntimeError(_402_HINT) from e + raise RuntimeError(f"TEE LLM completion failed: {e}") from e except Exception as e: raise RuntimeError(f"TEE LLM completion failed: {e}") from e @@ -354,7 +365,7 @@ async def _chat_request(self, params: _ChatParams, messages: List[Dict]) -> Text ) response.raise_for_status() raw_body = await response.aread() - result = json.loads(raw_body.decode()) + result = _json.loads(raw_body.decode()) choices = result.get("choices") if not choices: @@ -377,6 +388,12 @@ async def _chat_request(self, params: _ChatParams, messages: List[Dict]) -> Text ) except RuntimeError: raise + except httpx.HTTPStatusError as e: + # Provide an actionable error message for the very common 402 case + # (issue #188 — users see a cryptic RuntimeError instead of guidance). + if e.response.status_code == 402: + raise RuntimeError(_402_HINT) from e + raise RuntimeError(f"TEE LLM chat failed: {e}") from e except Exception as e: raise RuntimeError(f"TEE LLM chat failed: {e}") from e @@ -425,6 +442,8 @@ async def _parse_sse_response(self, response) -> AsyncGenerator[StreamChunk, Non status_code = getattr(response, "status_code", None) if status_code is not None and status_code >= 400: body = await response.aread() + if status_code == 402: + raise RuntimeError(_402_HINT) raise RuntimeError(f"TEE LLM streaming request failed with status {status_code}: {body.decode('utf-8', errors='replace')}") buffer = b"" @@ -452,8 +471,8 @@ async def _parse_sse_response(self, response) -> AsyncGenerator[StreamChunk, Non return try: - data = json.loads(data_str) - except json.JSONDecodeError: + data = _json.loads(data_str) + except _json.JSONDecodeError: continue chunk = StreamChunk.from_sse_data(data) From d345fdde234b883382b735c778ebccf99601477a Mon Sep 17 00:00:00 2001 From: AMATH <116212274+amathxbt@users.noreply.github.com> Date: Tue, 24 Mar 2026 00:47:43 +0100 Subject: [PATCH 3/4] fix(_conversions): return int for zero-decimal fixed-point values MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Previously convert_to_float32() always returned np.float32 regardless of the decimals field, forcing callers to manually cast integer results (issue #103 — add proper type conversions from Solidity contract to Python). Fix: - Add convert_fixed_point_to_python(value, decimals) that returns int when decimals == 0 and np.float32 otherwise. np.array() automatically picks the correct dtype (int64 vs float32) based on the element types. - Update both convert_to_model_output() and convert_array_to_model_output() to call the new function. - Keep convert_to_float32() as a deprecated backward-compatible alias so any external code that imports it directly continues to work. Partially closes #103 --- src/opengradient/client/_conversions.py | 49 +++++++++++++++++++------ 1 file changed, 37 insertions(+), 12 deletions(-) diff --git a/src/opengradient/client/_conversions.py b/src/opengradient/client/_conversions.py index 495f663..1b9e753 100644 --- a/src/opengradient/client/_conversions.py +++ b/src/opengradient/client/_conversions.py @@ -3,7 +3,7 @@ import json import logging from decimal import Decimal -from typing import Dict, List, Tuple +from typing import Dict, List, Tuple, Union import numpy as np from web3.datastructures import AttributeDict @@ -36,11 +36,34 @@ def convert_to_fixed_point(number: float) -> Tuple[int, int]: return value, decimals +def convert_fixed_point_to_python(value: int, decimals: int) -> Union[int, np.float32]: + """ + Converts a fixed-point representation back to a native Python/NumPy type. + + Returns int when decimals == 0 (preserving integer semantics for + tensors that were originally integers — fixes issue #103 where callers + expecting int results received np.float32 and had to cast manually). + Returns np.float32 for all other cases. + + Args: + value: The integer significand stored on-chain. + decimals: The scale factor exponent (value / 10**decimals). + + Returns: + int if decimals == 0, np.float32 otherwise. + """ + if decimals == 0: + return int(value) + return np.float32(Decimal(value) / (10 ** Decimal(decimals))) + + def convert_to_float32(value: int, decimals: int) -> np.float32: """ - Converts fixed point back into floating point + Deprecated: use convert_fixed_point_to_python() instead. - Returns an np.float32 type + Kept for backwards compatibility — always returns np.float32 regardless + of the decimals value. New callers should use convert_fixed_point_to_python + which correctly returns int when decimals == 0. """ return np.float32(Decimal(value) / (10 ** Decimal(decimals))) @@ -61,11 +84,11 @@ def convert_to_model_input(inputs: Dict[str, np.ndarray]) -> Tuple[List[Tuple[st for tensor_name, tensor_data in inputs.items(): # Convert to NP array if list or single object if isinstance(tensor_data, list): - logging.debug(f"\tConverting {tensor_data} to np array") + logging.debug(f" Converting {tensor_data} to np array") tensor_data = np.array(tensor_data) if isinstance(tensor_data, (str, int, float)): - logging.debug(f"\tConverting single entry {tensor_data} to a list") + logging.debug(f" Converting single entry {tensor_data} to a list") tensor_data = np.array([tensor_data]) # Check if type is np array @@ -84,7 +107,7 @@ def convert_to_model_input(inputs: Dict[str, np.ndarray]) -> Tuple[List[Tuple[st converted_tensor_data = np.array([convert_to_fixed_point(i) for i in flat_data], dtype=data_type) input = (tensor_name, converted_tensor_data.tolist(), shape) - logging.debug("\tFloating tensor input: %s", input) + logging.debug(" Floating tensor input: %s", input) number_tensors.append(input) elif issubclass(tensor_data.dtype.type, np.integer): @@ -93,13 +116,13 @@ def convert_to_model_input(inputs: Dict[str, np.ndarray]) -> Tuple[List[Tuple[st converted_tensor_data = np.array([convert_to_fixed_point(int(i)) for i in flat_data], dtype=data_type) input = (tensor_name, converted_tensor_data.tolist(), shape) - logging.debug("\tInteger tensor input: %s", input) + logging.debug(" Integer tensor input: %s", input) number_tensors.append(input) elif issubclass(tensor_data.dtype.type, np.str_): # TODO (Kyle): Add shape into here as well input = (tensor_name, [s for s in flat_data]) - logging.debug("\tString tensor input: %s", input) + logging.debug(" String tensor input: %s", input) string_tensors.append(input) else: @@ -131,10 +154,11 @@ def convert_to_model_output(event_data: AttributeDict) -> Dict[str, np.ndarray]: name = tensor.get("name") shape = tensor.get("shape") values = [] - # Convert from fixed point back into np.float32 + # Use convert_fixed_point_to_python so integer tensors (decimals==0) + # come back as int instead of np.float32 (fixes issue #103). for v in tensor.get("values", []): if isinstance(v, (AttributeDict, dict)): - values.append(convert_to_float32(value=int(v.get("value")), decimals=int(v.get("decimals")))) + values.append(convert_fixed_point_to_python(value=int(v.get("value")), decimals=int(v.get("decimals")))) else: logging.warning(f"Unexpected number type: {type(v)}") output_dict[name] = np.array(values).reshape(shape) @@ -183,10 +207,11 @@ def convert_array_to_model_output(array_data: List) -> ModelOutput: values = tensor[1] shape = tensor[2] - # Convert from fixed point into np.float32 + # Use convert_fixed_point_to_python so integer tensors (decimals==0) + # come back as int instead of np.float32 (fixes issue #103). converted_values = [] for value in values: - converted_values.append(convert_to_float32(value=value[0], decimals=value[1])) + converted_values.append(convert_fixed_point_to_python(value=value[0], decimals=value[1])) number_data[name] = np.array(converted_values).reshape(shape) From 72a6279468c24b795ad7f76061d17d66fc536642 Mon Sep 17 00:00:00 2001 From: AMATH <116212274+amathxbt@users.noreply.github.com> Date: Tue, 24 Mar 2026 00:49:18 +0100 Subject: [PATCH 4/4] fix(alpha): guard against None from inference node + use estimate_gas in run_workflow MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Bug 4 — None crash in infer(): _get_inference_result_from_node() can legitimately return None when the inference node has no result yet. Previously this None was passed directly to convert_to_model_output(), which calls event_data.get(), causing an AttributeError: 'NoneType' object has no attribute 'get'. Fix: after calling _get_inference_result_from_node(), check for None and raise a clear RuntimeError with a human-readable message and the inference_id so callers know what to retry. Also guard the precompile log fetch: if parsed_logs is empty, raise RuntimeError instead of crashing with an IndexError on parsed_logs[0]. Bug 5 — hardcoded 30M gas in run_workflow(): run_workflow() built the transaction with gas=30000000 (30 million) unconditionally. This is wasteful (users overpay for gas) and can fail on networks with a lower block gas limit. Fix: call estimate_gas() first (consistent with infer()), then multiply by 3 for headroom. Fall back to 30M only if estimation itself fails. Also fixes new_workflow() deploy to use INFERENCE_TX_TIMEOUT constant instead of the previous hardcoded 60 seconds. --- src/opengradient/client/alpha.py | 27 +++++++++++++++++++++++---- 1 file changed, 23 insertions(+), 4 deletions(-) diff --git a/src/opengradient/client/alpha.py b/src/opengradient/client/alpha.py index a4e633e..967f277 100644 --- a/src/opengradient/client/alpha.py +++ b/src/opengradient/client/alpha.py @@ -119,9 +119,19 @@ def execute_transaction(): model_output = convert_to_model_output(parsed_logs[0]["args"]) if len(model_output) == 0: # check inference directly from node - parsed_logs = precompile_contract.events.ModelInferenceEvent().process_receipt(tx_receipt, errors=DISCARD) - inference_id = parsed_logs[0]["args"]["inferenceID"] + precompile_logs = precompile_contract.events.ModelInferenceEvent().process_receipt(tx_receipt, errors=DISCARD) + if not precompile_logs: + raise RuntimeError( + "ModelInferenceEvent not found in transaction logs. " + "Cannot fall back to node-side inference result." + ) + inference_id = precompile_logs[0]["args"]["inferenceID"] inference_result = self._get_inference_result_from_node(inference_id, inference_mode) + if inference_result is None: + raise RuntimeError( + f"Inference node returned no result for inference ID {inference_id!r}. " + "The result may not be available yet — retry after a short delay." + ) model_output = convert_to_model_output(inference_result) return InferenceResult(tx_hash.hex(), model_output) @@ -315,7 +325,7 @@ def deploy_transaction(): signed_txn = self._wallet_account.sign_transaction(transaction) tx_hash = self._blockchain.eth.send_raw_transaction(signed_txn.raw_transaction) - tx_receipt = self._blockchain.eth.wait_for_transaction_receipt(tx_hash, timeout=60) + tx_receipt = self._blockchain.eth.wait_for_transaction_receipt(tx_hash, timeout=INFERENCE_TX_TIMEOUT) if tx_receipt["status"] == 0: raise Exception(f"Contract deployment failed, transaction hash: {tx_hash.hex()}") @@ -419,11 +429,20 @@ def run_workflow(self, contract_address: str) -> ModelOutput: nonce = self._blockchain.eth.get_transaction_count(self._wallet_account.address, "pending") run_function = contract.functions.run() + + # Estimate gas instead of using a hardcoded 30M limit, which is wasteful + # and may exceed the block gas limit on some networks. + try: + estimated_gas = run_function.estimate_gas({"from": self._wallet_account.address}) + gas_limit = int(estimated_gas * 3) + except Exception: + gas_limit = 30000000 # Conservative fallback if estimation fails + transaction = run_function.build_transaction( { "from": self._wallet_account.address, "nonce": nonce, - "gas": 30000000, + "gas": gas_limit, "gasPrice": self._blockchain.eth.gas_price, "chainId": self._blockchain.eth.chain_id, }