- Add synthetic.new skill (primary AI provider) - Add z.ai skill (fallback with GLM models) - Add lean backlog management skill with WSJF prioritization - Add lean prioritization skill with scheduling/parallelization - Add WWS serverless architecture overview 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
332 lines
9.2 KiB
Markdown
332 lines
9.2 KiB
Markdown
# Skill: WWS (Serverless Functions) Overview
|
|
|
|
## Description
|
|
Architecture and implementation guide for WWS (Web Worker Services) - serverless functions for edge computing in the Mylder platform.
|
|
|
|
## What is WWS?
|
|
WWS is our serverless function layer that runs lightweight compute at the edge, reducing VPS load and improving response times. Similar to Cloudflare Workers but platform-agnostic.
|
|
|
|
## Architecture
|
|
```
|
|
┌─────────────────────────────────────────────────────────────────┐
|
|
│ USER REQUEST │
|
|
└─────────────────────────────────────────────────────────────────┘
|
|
│
|
|
▼
|
|
┌─────────────────────────────────────────────────────────────────┐
|
|
│ EDGE LAYER (WWS) │
|
|
│ ┌──────────┐ ┌──────────┐ ┌──────────┐ ┌──────────┐ │
|
|
│ │ Auth │ │ Rate │ │ Model │ │ Cache │ │
|
|
│ │ Guard │ │ Limiter │ │ Router │ │ Manager │ │
|
|
│ └──────────┘ └──────────┘ └──────────┘ └──────────┘ │
|
|
└─────────────────────────────────────────────────────────────────┘
|
|
│
|
|
┌──────────────────┼──────────────────┐
|
|
│ │ │
|
|
▼ ▼ ▼
|
|
┌──────────┐ ┌──────────┐ ┌──────────┐
|
|
│synthetic │ │ z.ai │ │ Origin │
|
|
│ .new │ │(fallback)│ │ VPS │
|
|
└──────────┘ └──────────┘ └──────────┘
|
|
```
|
|
|
|
## Core Functions
|
|
|
|
### 1. Auth Guard
|
|
```javascript
|
|
// Edge authentication validation
|
|
export async function authGuard(request, env) {
|
|
const token = request.headers.get('Authorization')?.split('Bearer ')[1];
|
|
|
|
// Fast KV lookup
|
|
const session = await env.SESSIONS.get(token);
|
|
if (session) {
|
|
return { valid: true, user: JSON.parse(session), source: 'cache' };
|
|
}
|
|
|
|
// Fallback to Supabase
|
|
const supabaseUser = await validateWithSupabase(token, env);
|
|
if (supabaseUser) {
|
|
await env.SESSIONS.put(token, JSON.stringify(supabaseUser), {
|
|
expirationTtl: 3600 // 1 hour
|
|
});
|
|
return { valid: true, user: supabaseUser, source: 'origin' };
|
|
}
|
|
|
|
return { valid: false };
|
|
}
|
|
```
|
|
|
|
### 2. Rate Limiter
|
|
```javascript
|
|
// Per-user, per-endpoint rate limiting
|
|
export async function rateLimiter(request, env) {
|
|
const userId = request.headers.get('X-User-ID') || 'anonymous';
|
|
const endpoint = new URL(request.url).pathname;
|
|
const key = `rate:${userId}:${endpoint}`;
|
|
|
|
const current = await env.RATE_LIMITS.get(key) || '0';
|
|
const count = parseInt(current, 10);
|
|
|
|
const limits = {
|
|
'/api/ai/chat': 60, // 60 requests per minute
|
|
'/api/ai/generate': 20, // 20 per minute
|
|
'default': 100 // 100 per minute
|
|
};
|
|
|
|
const limit = limits[endpoint] || limits.default;
|
|
|
|
if (count >= limit) {
|
|
return {
|
|
allowed: false,
|
|
retryAfter: 60,
|
|
remaining: 0
|
|
};
|
|
}
|
|
|
|
await env.RATE_LIMITS.put(key, String(count + 1), {
|
|
expirationTtl: 60
|
|
});
|
|
|
|
return {
|
|
allowed: true,
|
|
remaining: limit - count - 1
|
|
};
|
|
}
|
|
```
|
|
|
|
### 3. Model Router
|
|
```javascript
|
|
// Intelligent AI model routing
|
|
export async function modelRouter(task, env) {
|
|
const { type, complexity, context_length } = task;
|
|
|
|
// Route based on task characteristics
|
|
const routing = {
|
|
// Primary: synthetic.new
|
|
primary: {
|
|
provider: 'synthetic.new',
|
|
base_url: 'https://api.synthetic.new/openai/v1',
|
|
models: {
|
|
code: 'hf:deepseek-ai/DeepSeek-V3',
|
|
reasoning: 'hf:moonshotai/Kimi-K2-Thinking'
|
|
}
|
|
},
|
|
// Fallback: z.ai
|
|
fallback: {
|
|
provider: 'z.ai',
|
|
base_url: 'https://api.z.ai/v1',
|
|
models: {
|
|
code: 'glm-4-flash',
|
|
reasoning: 'glm-4-plus',
|
|
long_context: 'glm-4-long'
|
|
}
|
|
}
|
|
};
|
|
|
|
// Special cases for z.ai
|
|
if (context_length > 128000) {
|
|
return {
|
|
...routing.fallback,
|
|
model: routing.fallback.models.long_context,
|
|
reason: 'Context exceeds 128K, using GLM-4-Long'
|
|
};
|
|
}
|
|
|
|
// Default: synthetic.new
|
|
const modelType = ['code', 'implementation', 'debugging'].includes(type)
|
|
? 'code'
|
|
: 'reasoning';
|
|
|
|
return {
|
|
...routing.primary,
|
|
model: routing.primary.models[modelType],
|
|
reason: `Standard ${modelType} task`
|
|
};
|
|
}
|
|
```
|
|
|
|
### 4. Cache Manager
|
|
```javascript
|
|
// Stale-while-revalidate caching
|
|
export async function cacheManager(request, env, handler) {
|
|
const cacheKey = new URL(request.url).pathname;
|
|
|
|
// Check cache
|
|
const cached = await env.RESPONSE_CACHE.get(cacheKey, { type: 'json' });
|
|
|
|
if (cached) {
|
|
const age = Date.now() - cached.timestamp;
|
|
const maxAge = 60000; // 1 minute
|
|
const staleAge = 300000; // 5 minutes
|
|
|
|
// Fresh cache
|
|
if (age < maxAge) {
|
|
return { data: cached.data, source: 'cache', age };
|
|
}
|
|
|
|
// Stale-while-revalidate
|
|
if (age < staleAge) {
|
|
// Return stale, revalidate in background
|
|
env.ctx.waitUntil(revalidate(cacheKey, handler, env));
|
|
return { data: cached.data, source: 'stale', age };
|
|
}
|
|
}
|
|
|
|
// No cache, fetch fresh
|
|
const fresh = await handler();
|
|
await env.RESPONSE_CACHE.put(cacheKey, JSON.stringify({
|
|
data: fresh,
|
|
timestamp: Date.now()
|
|
}));
|
|
|
|
return { data: fresh, source: 'origin', age: 0 };
|
|
}
|
|
|
|
async function revalidate(key, handler, env) {
|
|
const fresh = await handler();
|
|
await env.RESPONSE_CACHE.put(key, JSON.stringify({
|
|
data: fresh,
|
|
timestamp: Date.now()
|
|
}));
|
|
}
|
|
```
|
|
|
|
## Deployment Options
|
|
|
|
### Option 1: Cloudflare Workers
|
|
```yaml
|
|
# wrangler.toml
|
|
name = "mylder-wws"
|
|
main = "src/index.ts"
|
|
compatibility_date = "2024-12-01"
|
|
|
|
[[kv_namespaces]]
|
|
binding = "SESSIONS"
|
|
id = "xxx"
|
|
|
|
[[kv_namespaces]]
|
|
binding = "RATE_LIMITS"
|
|
id = "xxx"
|
|
|
|
[[kv_namespaces]]
|
|
binding = "RESPONSE_CACHE"
|
|
id = "xxx"
|
|
```
|
|
|
|
### Option 2: Deno Deploy
|
|
```typescript
|
|
// main.ts
|
|
import { serve } from "https://deno.land/std/http/server.ts";
|
|
|
|
serve(async (req) => {
|
|
const url = new URL(req.url);
|
|
|
|
if (url.pathname.startsWith('/api/ai/')) {
|
|
return handleAIRequest(req);
|
|
}
|
|
|
|
return new Response('Not Found', { status: 404 });
|
|
}, { port: 8000 });
|
|
```
|
|
|
|
### Option 3: Self-Hosted (Docker)
|
|
```dockerfile
|
|
# Dockerfile.wws
|
|
FROM denoland/deno:1.38.0
|
|
|
|
WORKDIR /app
|
|
COPY . .
|
|
|
|
RUN deno cache main.ts
|
|
|
|
EXPOSE 8000
|
|
CMD ["deno", "run", "--allow-net", "--allow-env", "main.ts"]
|
|
```
|
|
|
|
## Integration Points
|
|
|
|
### n8n Workflow Integration
|
|
```json
|
|
{
|
|
"name": "WWS Call",
|
|
"type": "n8n-nodes-base.httpRequest",
|
|
"parameters": {
|
|
"method": "POST",
|
|
"url": "https://wws.mylder.io/api/ai/chat",
|
|
"headers": {
|
|
"Authorization": "Bearer {{ $env.WWS_API_KEY }}",
|
|
"X-Task-Type": "{{ $json.taskType }}"
|
|
},
|
|
"body": {
|
|
"messages": "{{ $json.messages }}",
|
|
"context": "{{ $json.context }}"
|
|
}
|
|
}
|
|
}
|
|
```
|
|
|
|
### Frontend Integration
|
|
```typescript
|
|
// lib/wws.ts
|
|
export async function callWWS(endpoint: string, data: any) {
|
|
const response = await fetch(`${process.env.NEXT_PUBLIC_WWS_URL}${endpoint}`, {
|
|
method: 'POST',
|
|
headers: {
|
|
'Content-Type': 'application/json',
|
|
'Authorization': `Bearer ${getSession().token}`
|
|
},
|
|
body: JSON.stringify(data)
|
|
});
|
|
|
|
const result = await response.json();
|
|
|
|
// Handle rate limiting
|
|
if (response.status === 429) {
|
|
const retryAfter = response.headers.get('Retry-After');
|
|
throw new RateLimitError(retryAfter);
|
|
}
|
|
|
|
return result;
|
|
}
|
|
```
|
|
|
|
## Latency Targets
|
|
| Operation | Target | Fallback |
|
|
|-----------|--------|----------|
|
|
| Auth validation | < 10ms | < 300ms (origin) |
|
|
| Rate limit check | < 5ms | N/A |
|
|
| Model routing | < 2ms | N/A |
|
|
| Cache hit | < 10ms | N/A |
|
|
| AI request (primary) | < 5s | < 10s (fallback) |
|
|
|
|
## Cost Model
|
|
```
|
|
Cloudflare Workers:
|
|
- Free: 100K requests/day
|
|
- Paid: $5/month + $0.50/million requests
|
|
|
|
KV Storage:
|
|
- Free: 100K reads/day, 1K writes/day
|
|
- Paid: $0.50/million reads, $5/million writes
|
|
|
|
Estimated (1M users/month):
|
|
- Workers: ~$5/month
|
|
- KV: ~$25/month
|
|
- Total: ~$30/month
|
|
```
|
|
|
|
## Related Skills
|
|
- `wws/edge-auth.md` - Detailed auth implementation
|
|
- `wws/rate-limit.md` - Rate limiting patterns
|
|
- `wws/model-router.md` - AI model selection
|
|
- `ai-providers/synthetic-new.md` - Primary AI provider
|
|
- `ai-providers/z-ai.md` - Fallback provider
|
|
|
|
## Token Budget
|
|
- Max input: 500 tokens
|
|
- Max output: 1200 tokens
|
|
|
|
## Model
|
|
- Recommended: sonnet (architecture understanding)
|