Add AI providers, lean methodologies, and WWS skills
- Add synthetic.new skill (primary AI provider) - Add z.ai skill (fallback with GLM models) - Add lean backlog management skill with WSJF prioritization - Add lean prioritization skill with scheduling/parallelization - Add WWS serverless architecture overview 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <noreply@anthropic.com>
This commit is contained in:
331
skills/wws/overview.md
Normal file
331
skills/wws/overview.md
Normal file
@@ -0,0 +1,331 @@
|
||||
# Skill: WWS (Serverless Functions) Overview
|
||||
|
||||
## Description
|
||||
Architecture and implementation guide for WWS (Web Worker Services) - serverless functions for edge computing in the Mylder platform.
|
||||
|
||||
## What is WWS?
|
||||
WWS is our serverless function layer that runs lightweight compute at the edge, reducing VPS load and improving response times. Similar to Cloudflare Workers but platform-agnostic.
|
||||
|
||||
## Architecture
|
||||
```
|
||||
┌─────────────────────────────────────────────────────────────────┐
|
||||
│ USER REQUEST │
|
||||
└─────────────────────────────────────────────────────────────────┘
|
||||
│
|
||||
▼
|
||||
┌─────────────────────────────────────────────────────────────────┐
|
||||
│ EDGE LAYER (WWS) │
|
||||
│ ┌──────────┐ ┌──────────┐ ┌──────────┐ ┌──────────┐ │
|
||||
│ │ Auth │ │ Rate │ │ Model │ │ Cache │ │
|
||||
│ │ Guard │ │ Limiter │ │ Router │ │ Manager │ │
|
||||
│ └──────────┘ └──────────┘ └──────────┘ └──────────┘ │
|
||||
└─────────────────────────────────────────────────────────────────┘
|
||||
│
|
||||
┌──────────────────┼──────────────────┐
|
||||
│ │ │
|
||||
▼ ▼ ▼
|
||||
┌──────────┐ ┌──────────┐ ┌──────────┐
|
||||
│synthetic │ │ z.ai │ │ Origin │
|
||||
│ .new │ │(fallback)│ │ VPS │
|
||||
└──────────┘ └──────────┘ └──────────┘
|
||||
```
|
||||
|
||||
## Core Functions
|
||||
|
||||
### 1. Auth Guard
|
||||
```javascript
|
||||
// Edge authentication validation
|
||||
export async function authGuard(request, env) {
|
||||
const token = request.headers.get('Authorization')?.split('Bearer ')[1];
|
||||
|
||||
// Fast KV lookup
|
||||
const session = await env.SESSIONS.get(token);
|
||||
if (session) {
|
||||
return { valid: true, user: JSON.parse(session), source: 'cache' };
|
||||
}
|
||||
|
||||
// Fallback to Supabase
|
||||
const supabaseUser = await validateWithSupabase(token, env);
|
||||
if (supabaseUser) {
|
||||
await env.SESSIONS.put(token, JSON.stringify(supabaseUser), {
|
||||
expirationTtl: 3600 // 1 hour
|
||||
});
|
||||
return { valid: true, user: supabaseUser, source: 'origin' };
|
||||
}
|
||||
|
||||
return { valid: false };
|
||||
}
|
||||
```
|
||||
|
||||
### 2. Rate Limiter
|
||||
```javascript
|
||||
// Per-user, per-endpoint rate limiting
|
||||
export async function rateLimiter(request, env) {
|
||||
const userId = request.headers.get('X-User-ID') || 'anonymous';
|
||||
const endpoint = new URL(request.url).pathname;
|
||||
const key = `rate:${userId}:${endpoint}`;
|
||||
|
||||
const current = await env.RATE_LIMITS.get(key) || '0';
|
||||
const count = parseInt(current, 10);
|
||||
|
||||
const limits = {
|
||||
'/api/ai/chat': 60, // 60 requests per minute
|
||||
'/api/ai/generate': 20, // 20 per minute
|
||||
'default': 100 // 100 per minute
|
||||
};
|
||||
|
||||
const limit = limits[endpoint] || limits.default;
|
||||
|
||||
if (count >= limit) {
|
||||
return {
|
||||
allowed: false,
|
||||
retryAfter: 60,
|
||||
remaining: 0
|
||||
};
|
||||
}
|
||||
|
||||
await env.RATE_LIMITS.put(key, String(count + 1), {
|
||||
expirationTtl: 60
|
||||
});
|
||||
|
||||
return {
|
||||
allowed: true,
|
||||
remaining: limit - count - 1
|
||||
};
|
||||
}
|
||||
```
|
||||
|
||||
### 3. Model Router
|
||||
```javascript
|
||||
// Intelligent AI model routing
|
||||
export async function modelRouter(task, env) {
|
||||
const { type, complexity, context_length } = task;
|
||||
|
||||
// Route based on task characteristics
|
||||
const routing = {
|
||||
// Primary: synthetic.new
|
||||
primary: {
|
||||
provider: 'synthetic.new',
|
||||
base_url: 'https://api.synthetic.new/openai/v1',
|
||||
models: {
|
||||
code: 'hf:deepseek-ai/DeepSeek-V3',
|
||||
reasoning: 'hf:moonshotai/Kimi-K2-Thinking'
|
||||
}
|
||||
},
|
||||
// Fallback: z.ai
|
||||
fallback: {
|
||||
provider: 'z.ai',
|
||||
base_url: 'https://api.z.ai/v1',
|
||||
models: {
|
||||
code: 'glm-4-flash',
|
||||
reasoning: 'glm-4-plus',
|
||||
long_context: 'glm-4-long'
|
||||
}
|
||||
}
|
||||
};
|
||||
|
||||
// Special cases for z.ai
|
||||
if (context_length > 128000) {
|
||||
return {
|
||||
...routing.fallback,
|
||||
model: routing.fallback.models.long_context,
|
||||
reason: 'Context exceeds 128K, using GLM-4-Long'
|
||||
};
|
||||
}
|
||||
|
||||
// Default: synthetic.new
|
||||
const modelType = ['code', 'implementation', 'debugging'].includes(type)
|
||||
? 'code'
|
||||
: 'reasoning';
|
||||
|
||||
return {
|
||||
...routing.primary,
|
||||
model: routing.primary.models[modelType],
|
||||
reason: `Standard ${modelType} task`
|
||||
};
|
||||
}
|
||||
```
|
||||
|
||||
### 4. Cache Manager
|
||||
```javascript
|
||||
// Stale-while-revalidate caching
|
||||
export async function cacheManager(request, env, handler) {
|
||||
const cacheKey = new URL(request.url).pathname;
|
||||
|
||||
// Check cache
|
||||
const cached = await env.RESPONSE_CACHE.get(cacheKey, { type: 'json' });
|
||||
|
||||
if (cached) {
|
||||
const age = Date.now() - cached.timestamp;
|
||||
const maxAge = 60000; // 1 minute
|
||||
const staleAge = 300000; // 5 minutes
|
||||
|
||||
// Fresh cache
|
||||
if (age < maxAge) {
|
||||
return { data: cached.data, source: 'cache', age };
|
||||
}
|
||||
|
||||
// Stale-while-revalidate
|
||||
if (age < staleAge) {
|
||||
// Return stale, revalidate in background
|
||||
env.ctx.waitUntil(revalidate(cacheKey, handler, env));
|
||||
return { data: cached.data, source: 'stale', age };
|
||||
}
|
||||
}
|
||||
|
||||
// No cache, fetch fresh
|
||||
const fresh = await handler();
|
||||
await env.RESPONSE_CACHE.put(cacheKey, JSON.stringify({
|
||||
data: fresh,
|
||||
timestamp: Date.now()
|
||||
}));
|
||||
|
||||
return { data: fresh, source: 'origin', age: 0 };
|
||||
}
|
||||
|
||||
async function revalidate(key, handler, env) {
|
||||
const fresh = await handler();
|
||||
await env.RESPONSE_CACHE.put(key, JSON.stringify({
|
||||
data: fresh,
|
||||
timestamp: Date.now()
|
||||
}));
|
||||
}
|
||||
```
|
||||
|
||||
## Deployment Options
|
||||
|
||||
### Option 1: Cloudflare Workers
|
||||
```yaml
|
||||
# wrangler.toml
|
||||
name = "mylder-wws"
|
||||
main = "src/index.ts"
|
||||
compatibility_date = "2024-12-01"
|
||||
|
||||
[[kv_namespaces]]
|
||||
binding = "SESSIONS"
|
||||
id = "xxx"
|
||||
|
||||
[[kv_namespaces]]
|
||||
binding = "RATE_LIMITS"
|
||||
id = "xxx"
|
||||
|
||||
[[kv_namespaces]]
|
||||
binding = "RESPONSE_CACHE"
|
||||
id = "xxx"
|
||||
```
|
||||
|
||||
### Option 2: Deno Deploy
|
||||
```typescript
|
||||
// main.ts
|
||||
import { serve } from "https://deno.land/std/http/server.ts";
|
||||
|
||||
serve(async (req) => {
|
||||
const url = new URL(req.url);
|
||||
|
||||
if (url.pathname.startsWith('/api/ai/')) {
|
||||
return handleAIRequest(req);
|
||||
}
|
||||
|
||||
return new Response('Not Found', { status: 404 });
|
||||
}, { port: 8000 });
|
||||
```
|
||||
|
||||
### Option 3: Self-Hosted (Docker)
|
||||
```dockerfile
|
||||
# Dockerfile.wws
|
||||
FROM denoland/deno:1.38.0
|
||||
|
||||
WORKDIR /app
|
||||
COPY . .
|
||||
|
||||
RUN deno cache main.ts
|
||||
|
||||
EXPOSE 8000
|
||||
CMD ["deno", "run", "--allow-net", "--allow-env", "main.ts"]
|
||||
```
|
||||
|
||||
## Integration Points
|
||||
|
||||
### n8n Workflow Integration
|
||||
```json
|
||||
{
|
||||
"name": "WWS Call",
|
||||
"type": "n8n-nodes-base.httpRequest",
|
||||
"parameters": {
|
||||
"method": "POST",
|
||||
"url": "https://wws.mylder.io/api/ai/chat",
|
||||
"headers": {
|
||||
"Authorization": "Bearer {{ $env.WWS_API_KEY }}",
|
||||
"X-Task-Type": "{{ $json.taskType }}"
|
||||
},
|
||||
"body": {
|
||||
"messages": "{{ $json.messages }}",
|
||||
"context": "{{ $json.context }}"
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
### Frontend Integration
|
||||
```typescript
|
||||
// lib/wws.ts
|
||||
export async function callWWS(endpoint: string, data: any) {
|
||||
const response = await fetch(`${process.env.NEXT_PUBLIC_WWS_URL}${endpoint}`, {
|
||||
method: 'POST',
|
||||
headers: {
|
||||
'Content-Type': 'application/json',
|
||||
'Authorization': `Bearer ${getSession().token}`
|
||||
},
|
||||
body: JSON.stringify(data)
|
||||
});
|
||||
|
||||
const result = await response.json();
|
||||
|
||||
// Handle rate limiting
|
||||
if (response.status === 429) {
|
||||
const retryAfter = response.headers.get('Retry-After');
|
||||
throw new RateLimitError(retryAfter);
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
```
|
||||
|
||||
## Latency Targets
|
||||
| Operation | Target | Fallback |
|
||||
|-----------|--------|----------|
|
||||
| Auth validation | < 10ms | < 300ms (origin) |
|
||||
| Rate limit check | < 5ms | N/A |
|
||||
| Model routing | < 2ms | N/A |
|
||||
| Cache hit | < 10ms | N/A |
|
||||
| AI request (primary) | < 5s | < 10s (fallback) |
|
||||
|
||||
## Cost Model
|
||||
```
|
||||
Cloudflare Workers:
|
||||
- Free: 100K requests/day
|
||||
- Paid: $5/month + $0.50/million requests
|
||||
|
||||
KV Storage:
|
||||
- Free: 100K reads/day, 1K writes/day
|
||||
- Paid: $0.50/million reads, $5/million writes
|
||||
|
||||
Estimated (1M users/month):
|
||||
- Workers: ~$5/month
|
||||
- KV: ~$25/month
|
||||
- Total: ~$30/month
|
||||
```
|
||||
|
||||
## Related Skills
|
||||
- `wws/edge-auth.md` - Detailed auth implementation
|
||||
- `wws/rate-limit.md` - Rate limiting patterns
|
||||
- `wws/model-router.md` - AI model selection
|
||||
- `ai-providers/synthetic-new.md` - Primary AI provider
|
||||
- `ai-providers/z-ai.md` - Fallback provider
|
||||
|
||||
## Token Budget
|
||||
- Max input: 500 tokens
|
||||
- Max output: 1200 tokens
|
||||
|
||||
## Model
|
||||
- Recommended: sonnet (architecture understanding)
|
||||
Reference in New Issue
Block a user