Update back off mechanism to only back off if the endpoint is not available
This commit is contained in:
parent
307abfc78f
commit
0dc3f060ef
@ -6,7 +6,7 @@ QUICKNODE_TOKEN=your_token_here
|
|||||||
|
|
||||||
# Proxy settings
|
# Proxy settings
|
||||||
PROXY_PORT=8545
|
PROXY_PORT=8545
|
||||||
CACHE_SIZE_GB=100
|
CACHE_SIZE_GB=1
|
||||||
BACKOFF_MINUTES=30
|
BACKOFF_MINUTES=30
|
||||||
|
|
||||||
# Logging
|
# Logging
|
||||||
|
3
cache.py
3
cache.py
@ -6,7 +6,7 @@ import diskcache
|
|||||||
|
|
||||||
|
|
||||||
class Cache:
|
class Cache:
|
||||||
def __init__(self, cache_dir: str = "./cache", size_limit_gb: int = 100):
|
def __init__(self, cache_dir: str = "./cache", size_limit_gb: int = 1):
|
||||||
self.cache_dir = cache_dir
|
self.cache_dir = cache_dir
|
||||||
self.size_limit_bytes = size_limit_gb * 1024 * 1024 * 1024
|
self.size_limit_bytes = size_limit_gb * 1024 * 1024 * 1024
|
||||||
self.cache = diskcache.Cache(
|
self.cache = diskcache.Cache(
|
||||||
@ -57,4 +57,3 @@ class Cache:
|
|||||||
"count": stats[0],
|
"count": stats[0],
|
||||||
"limit_gb": self.size_limit_bytes / (1024 * 1024 * 1024)
|
"limit_gb": self.size_limit_bytes / (1024 * 1024 * 1024)
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -44,14 +44,15 @@ class CachePolicy:
|
|||||||
Returns:
|
Returns:
|
||||||
True if the method should be cached, False otherwise
|
True if the method should be cached, False otherwise
|
||||||
"""
|
"""
|
||||||
if method in self.CACHEABLE_IMMUTABLE:
|
|
||||||
|
if method in self.CACHEABLE_WITH_TTL:
|
||||||
# For getBlock, only cache finalized blocks
|
# For getBlock, only cache finalized blocks
|
||||||
if method == 'getBlock':
|
if method == 'getBlock':
|
||||||
commitment = self._get_commitment(params)
|
commitment = self._get_commitment(params)
|
||||||
return commitment == 'finalized'
|
return commitment == 'finalized'
|
||||||
return True
|
return True
|
||||||
|
|
||||||
if method in self.CACHEABLE_WITH_TTL:
|
if method in self.CACHEABLE_IMMUTABLE:
|
||||||
return True
|
return True
|
||||||
|
|
||||||
# Default to not caching unknown methods
|
# Default to not caching unknown methods
|
||||||
|
@ -34,7 +34,7 @@ Provider class:
|
|||||||
Cache class:
|
Cache class:
|
||||||
- get(method: str, params: dict) -> Optional[response]
|
- get(method: str, params: dict) -> Optional[response]
|
||||||
- set(method: str, params: dict, response: dict) -> None
|
- set(method: str, params: dict, response: dict) -> None
|
||||||
- size_check() -> None # Enforce 100GB limit
|
- size_check() -> None # Enforce 1GB limit
|
||||||
- clear_oldest() -> None # LRU eviction
|
- clear_oldest() -> None # LRU eviction
|
||||||
```
|
```
|
||||||
|
|
||||||
@ -42,7 +42,7 @@ Cache class:
|
|||||||
- Use `diskcache` library for simplicity
|
- Use `diskcache` library for simplicity
|
||||||
- Key format: `f"{method}:{json.dumps(params, sort_keys=True)}"`
|
- Key format: `f"{method}:{json.dumps(params, sort_keys=True)}"`
|
||||||
- Store both HTTP responses and WebSocket messages
|
- Store both HTTP responses and WebSocket messages
|
||||||
- Implement 100GB limit with LRU eviction
|
- Implement 1GB limit with LRU eviction
|
||||||
|
|
||||||
### 3. Error Logger Module (`errors.py`)
|
### 3. Error Logger Module (`errors.py`)
|
||||||
**Purpose**: SQLite-based error logging with UUID tracking
|
**Purpose**: SQLite-based error logging with UUID tracking
|
||||||
@ -146,7 +146,7 @@ QUICKNODE_TOKEN=your_token_here
|
|||||||
|
|
||||||
# Proxy settings
|
# Proxy settings
|
||||||
PROXY_PORT=8545
|
PROXY_PORT=8545
|
||||||
CACHE_SIZE_GB=100
|
CACHE_SIZE_GB=1
|
||||||
BACKOFF_MINUTES=30
|
BACKOFF_MINUTES=30
|
||||||
|
|
||||||
# Logging
|
# Logging
|
||||||
@ -227,7 +227,7 @@ Happy-path end-to-end tests only:
|
|||||||
|
|
||||||
## Deployment Considerations
|
## Deployment Considerations
|
||||||
|
|
||||||
1. **Cache Storage**: Need ~100GB disk space
|
1. **Cache Storage**: Need ~1GB disk space
|
||||||
2. **Memory Usage**: Keep minimal, use disk cache
|
2. **Memory Usage**: Keep minimal, use disk cache
|
||||||
3. **Concurrent Clients**: Basic round-robin if multiple connect
|
3. **Concurrent Clients**: Basic round-robin if multiple connect
|
||||||
4. **Monitoring**: Log all errors, provide error IDs
|
4. **Monitoring**: Log all errors, provide error IDs
|
||||||
@ -273,7 +273,7 @@ aiohttp-cors==0.7.0
|
|||||||
|
|
||||||
1. Single endpoint proxies to 5 providers
|
1. Single endpoint proxies to 5 providers
|
||||||
2. Automatic failover works
|
2. Automatic failover works
|
||||||
3. Responses are cached (up to 100GB)
|
3. Responses are cached (up to 1GB)
|
||||||
4. Errors logged with retrievable IDs
|
4. Errors logged with retrievable IDs
|
||||||
5. Both HTTP and WebSocket work
|
5. Both HTTP and WebSocket work
|
||||||
6. Response format is unified
|
6. Response format is unified
|
||||||
|
4
main.py
4
main.py
@ -35,7 +35,7 @@ def load_config() -> dict:
|
|||||||
|
|
||||||
return {
|
return {
|
||||||
"proxy_port": int(os.getenv("PROXY_PORT", 8545)),
|
"proxy_port": int(os.getenv("PROXY_PORT", 8545)),
|
||||||
"cache_size_gb": int(os.getenv("CACHE_SIZE_GB", 100)),
|
"cache_size_gb": int(os.getenv("CACHE_SIZE_GB", 1)),
|
||||||
"backoff_minutes": int(os.getenv("BACKOFF_MINUTES", 30)),
|
"backoff_minutes": int(os.getenv("BACKOFF_MINUTES", 30)),
|
||||||
"log_level": os.getenv("LOG_LEVEL", "INFO"),
|
"log_level": os.getenv("LOG_LEVEL", "INFO"),
|
||||||
"error_db_path": os.getenv("ERROR_DB_PATH", "./errors.db"),
|
"error_db_path": os.getenv("ERROR_DB_PATH", "./errors.db"),
|
||||||
@ -72,7 +72,7 @@ def main() -> None:
|
|||||||
|
|
||||||
logger = logging.getLogger(__name__)
|
logger = logging.getLogger(__name__)
|
||||||
logger.info(f"Starting Solana RPC Proxy on port {config['proxy_port']}")
|
logger.info(f"Starting Solana RPC Proxy on port {config['proxy_port']}")
|
||||||
logger.info(f"Intelligent caching enabled - Cache size limit: {config['cache_size_gb']}GB")
|
logger.info(f"Cache size limit: {config['cache_size_gb']}GB")
|
||||||
logger.info(f"Provider backoff time: {config['backoff_minutes']} minutes")
|
logger.info(f"Provider backoff time: {config['backoff_minutes']} minutes")
|
||||||
|
|
||||||
app = create_app(config)
|
app = create_app(config)
|
||||||
|
31
router.py
31
router.py
@ -20,7 +20,7 @@ class Router:
|
|||||||
async def route_request(self, method: str, params: Dict[str, Any]) -> Dict[str, Any]:
|
async def route_request(self, method: str, params: Dict[str, Any]) -> Dict[str, Any]:
|
||||||
request = {"method": method, "params": params}
|
request = {"method": method, "params": params}
|
||||||
|
|
||||||
# Check if this method should be cached based on intelligent caching policy
|
# Check if this method should be cached based on caching policy
|
||||||
should_cache = self.cache_policy.should_cache(method, params)
|
should_cache = self.cache_policy.should_cache(method, params)
|
||||||
|
|
||||||
if should_cache:
|
if should_cache:
|
||||||
@ -60,7 +60,13 @@ class Router:
|
|||||||
except Exception as error:
|
except Exception as error:
|
||||||
error_id = self.error_logger.log_error(provider.name, request, error)
|
error_id = self.error_logger.log_error(provider.name, request, error)
|
||||||
self.logger.warning(f"Provider {provider.name} failed: {error} (ID: {error_id})")
|
self.logger.warning(f"Provider {provider.name} failed: {error} (ID: {error_id})")
|
||||||
|
|
||||||
|
# Only mark provider as failed for server/network issues, not RPC errors
|
||||||
|
if await self._is_server_failure(provider, error):
|
||||||
provider.mark_failed()
|
provider.mark_failed()
|
||||||
|
self.logger.warning(f"Provider {provider.name} marked as failed due to server issue")
|
||||||
|
else:
|
||||||
|
self.logger.debug(f"Provider {provider.name} had RPC error but server is available")
|
||||||
|
|
||||||
return self._create_error_response(
|
return self._create_error_response(
|
||||||
"All providers failed to handle the request",
|
"All providers failed to handle the request",
|
||||||
@ -77,6 +83,29 @@ class Router:
|
|||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
async def _is_server_failure(self, provider: Provider, error: Exception) -> bool:
|
||||||
|
"""
|
||||||
|
Check if the provider server is actually down by making a simple health check.
|
||||||
|
Only mark as failed if server is unreachable.
|
||||||
|
"""
|
||||||
|
try:
|
||||||
|
# Quick health check with minimal timeout
|
||||||
|
timeout = aiohttp.ClientTimeout(total=5) # 5 second timeout
|
||||||
|
async with aiohttp.ClientSession(timeout=timeout) as session:
|
||||||
|
# Try a simple HTTP GET to check server availability
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
parsed_url = urlparse(provider.http_url)
|
||||||
|
health_url = f"{parsed_url.scheme}://{parsed_url.netloc}"
|
||||||
|
|
||||||
|
async with session.get(health_url) as response:
|
||||||
|
# Server responded (even with error codes), so it's alive
|
||||||
|
return False
|
||||||
|
|
||||||
|
except Exception as health_error:
|
||||||
|
# Server is actually unreachable
|
||||||
|
self.logger.debug(f"Health check failed for {provider.name}: {health_error}")
|
||||||
|
return True
|
||||||
|
|
||||||
async def _make_request(self, provider: Provider, request: Dict[str, Any]) -> Dict[str, Any]:
|
async def _make_request(self, provider: Provider, request: Dict[str, Any]) -> Dict[str, Any]:
|
||||||
transformed_request = provider.transform_request(request)
|
transformed_request = provider.transform_request(request)
|
||||||
|
|
||||||
|
Loading…
Reference in New Issue
Block a user