Update back off mechanism to only back off if the endpoint is not available
This commit is contained in:
parent
307abfc78f
commit
0dc3f060ef
@ -6,7 +6,7 @@ QUICKNODE_TOKEN=your_token_here
|
||||
|
||||
# Proxy settings
|
||||
PROXY_PORT=8545
|
||||
CACHE_SIZE_GB=100
|
||||
CACHE_SIZE_GB=1
|
||||
BACKOFF_MINUTES=30
|
||||
|
||||
# Logging
|
||||
|
3
cache.py
3
cache.py
@ -6,7 +6,7 @@ import diskcache
|
||||
|
||||
|
||||
class Cache:
|
||||
def __init__(self, cache_dir: str = "./cache", size_limit_gb: int = 100):
|
||||
def __init__(self, cache_dir: str = "./cache", size_limit_gb: int = 1):
|
||||
self.cache_dir = cache_dir
|
||||
self.size_limit_bytes = size_limit_gb * 1024 * 1024 * 1024
|
||||
self.cache = diskcache.Cache(
|
||||
@ -57,4 +57,3 @@ class Cache:
|
||||
"count": stats[0],
|
||||
"limit_gb": self.size_limit_bytes / (1024 * 1024 * 1024)
|
||||
}
|
||||
|
||||
|
@ -44,14 +44,15 @@ class CachePolicy:
|
||||
Returns:
|
||||
True if the method should be cached, False otherwise
|
||||
"""
|
||||
if method in self.CACHEABLE_IMMUTABLE:
|
||||
|
||||
if method in self.CACHEABLE_WITH_TTL:
|
||||
# For getBlock, only cache finalized blocks
|
||||
if method == 'getBlock':
|
||||
commitment = self._get_commitment(params)
|
||||
return commitment == 'finalized'
|
||||
return True
|
||||
|
||||
if method in self.CACHEABLE_WITH_TTL:
|
||||
if method in self.CACHEABLE_IMMUTABLE:
|
||||
return True
|
||||
|
||||
# Default to not caching unknown methods
|
||||
|
@ -34,7 +34,7 @@ Provider class:
|
||||
Cache class:
|
||||
- get(method: str, params: dict) -> Optional[response]
|
||||
- set(method: str, params: dict, response: dict) -> None
|
||||
- size_check() -> None # Enforce 100GB limit
|
||||
- size_check() -> None # Enforce 1GB limit
|
||||
- clear_oldest() -> None # LRU eviction
|
||||
```
|
||||
|
||||
@ -42,7 +42,7 @@ Cache class:
|
||||
- Use `diskcache` library for simplicity
|
||||
- Key format: `f"{method}:{json.dumps(params, sort_keys=True)}"`
|
||||
- Store both HTTP responses and WebSocket messages
|
||||
- Implement 100GB limit with LRU eviction
|
||||
- Implement 1GB limit with LRU eviction
|
||||
|
||||
### 3. Error Logger Module (`errors.py`)
|
||||
**Purpose**: SQLite-based error logging with UUID tracking
|
||||
@ -146,7 +146,7 @@ QUICKNODE_TOKEN=your_token_here
|
||||
|
||||
# Proxy settings
|
||||
PROXY_PORT=8545
|
||||
CACHE_SIZE_GB=100
|
||||
CACHE_SIZE_GB=1
|
||||
BACKOFF_MINUTES=30
|
||||
|
||||
# Logging
|
||||
@ -227,7 +227,7 @@ Happy-path end-to-end tests only:
|
||||
|
||||
## Deployment Considerations
|
||||
|
||||
1. **Cache Storage**: Need ~100GB disk space
|
||||
1. **Cache Storage**: Need ~1GB disk space
|
||||
2. **Memory Usage**: Keep minimal, use disk cache
|
||||
3. **Concurrent Clients**: Basic round-robin if multiple connect
|
||||
4. **Monitoring**: Log all errors, provide error IDs
|
||||
@ -273,7 +273,7 @@ aiohttp-cors==0.7.0
|
||||
|
||||
1. Single endpoint proxies to 5 providers
|
||||
2. Automatic failover works
|
||||
3. Responses are cached (up to 100GB)
|
||||
3. Responses are cached (up to 1GB)
|
||||
4. Errors logged with retrievable IDs
|
||||
5. Both HTTP and WebSocket work
|
||||
6. Response format is unified
|
||||
|
4
main.py
4
main.py
@ -35,7 +35,7 @@ def load_config() -> dict:
|
||||
|
||||
return {
|
||||
"proxy_port": int(os.getenv("PROXY_PORT", 8545)),
|
||||
"cache_size_gb": int(os.getenv("CACHE_SIZE_GB", 100)),
|
||||
"cache_size_gb": int(os.getenv("CACHE_SIZE_GB", 1)),
|
||||
"backoff_minutes": int(os.getenv("BACKOFF_MINUTES", 30)),
|
||||
"log_level": os.getenv("LOG_LEVEL", "INFO"),
|
||||
"error_db_path": os.getenv("ERROR_DB_PATH", "./errors.db"),
|
||||
@ -72,7 +72,7 @@ def main() -> None:
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
logger.info(f"Starting Solana RPC Proxy on port {config['proxy_port']}")
|
||||
logger.info(f"Intelligent caching enabled - Cache size limit: {config['cache_size_gb']}GB")
|
||||
logger.info(f"Cache size limit: {config['cache_size_gb']}GB")
|
||||
logger.info(f"Provider backoff time: {config['backoff_minutes']} minutes")
|
||||
|
||||
app = create_app(config)
|
||||
|
31
router.py
31
router.py
@ -20,7 +20,7 @@ class Router:
|
||||
async def route_request(self, method: str, params: Dict[str, Any]) -> Dict[str, Any]:
|
||||
request = {"method": method, "params": params}
|
||||
|
||||
# Check if this method should be cached based on intelligent caching policy
|
||||
# Check if this method should be cached based on caching policy
|
||||
should_cache = self.cache_policy.should_cache(method, params)
|
||||
|
||||
if should_cache:
|
||||
@ -60,7 +60,13 @@ class Router:
|
||||
except Exception as error:
|
||||
error_id = self.error_logger.log_error(provider.name, request, error)
|
||||
self.logger.warning(f"Provider {provider.name} failed: {error} (ID: {error_id})")
|
||||
|
||||
# Only mark provider as failed for server/network issues, not RPC errors
|
||||
if await self._is_server_failure(provider, error):
|
||||
provider.mark_failed()
|
||||
self.logger.warning(f"Provider {provider.name} marked as failed due to server issue")
|
||||
else:
|
||||
self.logger.debug(f"Provider {provider.name} had RPC error but server is available")
|
||||
|
||||
return self._create_error_response(
|
||||
"All providers failed to handle the request",
|
||||
@ -77,6 +83,29 @@ class Router:
|
||||
|
||||
return None
|
||||
|
||||
async def _is_server_failure(self, provider: Provider, error: Exception) -> bool:
|
||||
"""
|
||||
Check if the provider server is actually down by making a simple health check.
|
||||
Only mark as failed if server is unreachable.
|
||||
"""
|
||||
try:
|
||||
# Quick health check with minimal timeout
|
||||
timeout = aiohttp.ClientTimeout(total=5) # 5 second timeout
|
||||
async with aiohttp.ClientSession(timeout=timeout) as session:
|
||||
# Try a simple HTTP GET to check server availability
|
||||
from urllib.parse import urlparse
|
||||
parsed_url = urlparse(provider.http_url)
|
||||
health_url = f"{parsed_url.scheme}://{parsed_url.netloc}"
|
||||
|
||||
async with session.get(health_url) as response:
|
||||
# Server responded (even with error codes), so it's alive
|
||||
return False
|
||||
|
||||
except Exception as health_error:
|
||||
# Server is actually unreachable
|
||||
self.logger.debug(f"Health check failed for {provider.name}: {health_error}")
|
||||
return True
|
||||
|
||||
async def _make_request(self, provider: Provider, request: Dict[str, Any]) -> Dict[str, Any]:
|
||||
transformed_request = provider.transform_request(request)
|
||||
|
||||
|
Loading…
Reference in New Issue
Block a user