Files
openclaw-market/scripts/generate-mock-data.ts
2026-03-16 08:52:44 +08:00

656 lines
22 KiB
TypeScript
Raw Permalink Blame History

This file contains invisible Unicode characters
This file contains invisible Unicode characters that are indistinguishable to humans but may be processed differently by a computer. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
/**
* 模拟数据生成脚本
* 生成约 240 只龙虾,分布在全球各个地区
*
* 使用方法: npx tsx scripts/generate-mock-data.ts
*/
import { db } from "@/lib/db";
import { claws, heartbeats, tasks, tokenUsage } from "@/lib/db/schema";
import { nanoid } from "nanoid";
import { sql } from "drizzle-orm";
// ============ 配置 ============
const TOTAL_CLAWS = 240;
const DAYS_BACK = 7;
const APP_URL = process.env.NEXT_PUBLIC_APP_URL || "http://localhost:3000";
// ============ 地区分布数据 ============
// 按真实 AI 使用情况分配比例
const REGION_DISTRIBUTION = [
{ region: "Americas", weight: 35, countries: ["US", "CA", "BR", "MX", "AR"] },
{ region: "Asia", weight: 30, countries: ["CN", "JP", "KR", "IN", "SG", "HK", "TW"] },
{ region: "Europe", weight: 28, countries: ["GB", "DE", "FR", "NL", "SE", "PL", "IT", "ES"] },
{ region: "Oceania", weight: 5, countries: ["AU", "NZ"] },
{ region: "Africa", weight: 2, countries: ["ZA", "NG", "KE", "EG"] },
];
// 城市数据(国家代码 -> 城市列表 + 坐标范围)
const CITIES_DATA: Record<string, { name: string; lat: number; lng: number }[]> = {
// Americas
US: [
{ name: "San Francisco", lat: 37.7749, lng: -122.4194 },
{ name: "New York", lat: 40.7128, lng: -74.006 },
{ name: "Seattle", lat: 47.6062, lng: -122.3321 },
{ name: "Los Angeles", lat: 34.0522, lng: -118.2437 },
{ name: "Boston", lat: 42.3601, lng: -71.0589 },
{ name: "Austin", lat: 30.2672, lng: -97.7431 },
{ name: "Chicago", lat: 41.8781, lng: -87.6298 },
{ name: "Denver", lat: 39.7392, lng: -104.9903 },
],
CA: [
{ name: "Toronto", lat: 43.6532, lng: -79.3832 },
{ name: "Vancouver", lat: 49.2827, lng: -123.1207 },
{ name: "Montreal", lat: 45.5017, lng: -73.5673 },
],
BR: [
{ name: "Sao Paulo", lat: -23.5505, lng: -46.6333 },
{ name: "Rio de Janeiro", lat: -22.9068, lng: -43.1729 },
],
MX: [{ name: "Mexico City", lat: 19.4326, lng: -99.1332 }],
AR: [{ name: "Buenos Aires", lat: -34.6037, lng: -58.3816 }],
// Asia
CN: [
{ name: "Beijing", lat: 39.9042, lng: 116.4074 },
{ name: "Shanghai", lat: 31.2304, lng: 121.4737 },
{ name: "Shenzhen", lat: 22.5431, lng: 114.0579 },
{ name: "Hangzhou", lat: 30.2741, lng: 120.1551 },
{ name: "Guangzhou", lat: 23.1291, lng: 113.2644 },
{ name: "Chengdu", lat: 30.5728, lng: 104.0668 },
],
JP: [
{ name: "Tokyo", lat: 35.6762, lng: 139.6503 },
{ name: "Osaka", lat: 34.6937, lng: 135.5023 },
],
KR: [{ name: "Seoul", lat: 37.5665, lng: 126.978 }],
IN: [
{ name: "Bangalore", lat: 12.9716, lng: 77.5946 },
{ name: "Mumbai", lat: 19.076, lng: 72.8777 },
{ name: "Delhi", lat: 28.7041, lng: 77.1025 },
],
SG: [{ name: "Singapore", lat: 1.3521, lng: 103.8198 }],
HK: [{ name: "Hong Kong", lat: 22.3193, lng: 114.1694 }],
TW: [{ name: "Taipei", lat: 25.033, lng: 121.5654 }],
// Europe
GB: [
{ name: "London", lat: 51.5074, lng: -0.1278 },
{ name: "Manchester", lat: 53.4808, lng: -2.2426 },
],
DE: [
{ name: "Berlin", lat: 52.52, lng: 13.405 },
{ name: "Munich", lat: 48.1351, lng: 11.582 },
{ name: "Frankfurt", lat: 50.1109, lng: 8.6821 },
],
FR: [{ name: "Paris", lat: 48.8566, lng: 2.3522 }],
NL: [
{ name: "Amsterdam", lat: 52.3676, lng: 4.9041 },
{ name: "Eindhoven", lat: 51.4416, lng: 5.4697 },
],
SE: [{ name: "Stockholm", lat: 59.3293, lng: 18.0686 }],
PL: [{ name: "Warsaw", lat: 52.2297, lng: 21.0122 }],
IT: [{ name: "Milan", lat: 45.4642, lng: 9.19 }],
ES: [{ name: "Madrid", lat: 40.4168, lng: -3.7038 }],
// Oceania
AU: [
{ name: "Sydney", lat: -33.8688, lng: 151.2093 },
{ name: "Melbourne", lat: -37.8136, lng: 144.9631 },
],
NZ: [{ name: "Auckland", lat: -36.8509, lng: 174.7645 }],
// Africa
ZA: [{ name: "Cape Town", lat: -33.9249, lng: 18.4241 }],
NG: [{ name: "Lagos", lat: 6.5244, lng: 3.3792 }],
KE: [{ name: "Nairobi", lat: -1.2921, lng: 36.8219 }],
EG: [{ name: "Cairo", lat: 30.0444, lng: 31.2357 }],
};
// 国家全名
const COUNTRY_NAMES: Record<string, string> = {
US: "United States",
CA: "Canada",
BR: "Brazil",
MX: "Mexico",
AR: "Argentina",
CN: "China",
JP: "Japan",
KR: "South Korea",
IN: "India",
SG: "Singapore",
HK: "Hong Kong",
TW: "Taiwan",
GB: "United Kingdom",
DE: "Germany",
FR: "France",
NL: "Netherlands",
SE: "Sweden",
PL: "Poland",
IT: "Italy",
ES: "Spain",
AU: "Australia",
NZ: "New Zealand",
ZA: "South Africa",
NG: "Nigeria",
KE: "Kenya",
EG: "Egypt",
};
// ============ 名字生成 ============
const ADJECTIVES = [
"Swift", "Brave", "Clever", "Silent", "Mystic", "Noble", "Fierce", "Gentle",
"Wise", "Bold", "Sharp", "Quick", "Calm", "Wild", "Bright", "Dark",
"Golden", "Silver", "Iron", "Steel", "Crystal", "Shadow", "Storm", "Thunder",
"Phoenix", "Dragon", "Tiger", "Eagle", "Wolf", "Bear", "Lion", "Falcon",
"Cosmic", "Stellar", "Nova", "Quantum", "Cyber", "Neural", "Atomic", "Solar",
"Arctic", "Desert", "Ocean", "Mountain", "Forest", "River", "Valley", "Peak",
];
const NOUNS = [
"Coder", "Hacker", "Builder", "Maker", "Creator", "Developer", "Engineer",
"Architect", "Designer", "Pioneer", "Explorer", "Seeker", "Hunter", "Guardian",
"Sentinel", "Warrior", "Scholar", "Sage", "Wizard", "Alchemist", "Artisan",
"Craftsman", "Visionary", "Innovator", "Strategist", "Analyst", "Researcher",
"Navigator", "Pilot", "Captain", "Commander", "Champion", "Master", "Expert",
"Ninja", "Samurai", "Knight", "Ranger", "Scout", "Agent", "Operator", "Pilot",
];
const SUFFIXES = [
"", "", "", "", // 大部分没有后缀
"Prime", "Alpha", "Beta", "Gamma", "Delta", "Omega",
"X", "Z", "Pro", "Max", "Ultra", "Mega", "Super",
"AI", "Bot", "Agent", "Claw", "Byte", "Node", "Core",
];
function generateName(index: number): string {
const adj = ADJECTIVES[index % ADJECTIVES.length];
const noun = NOUNS[Math.floor(index / ADJECTIVES.length) % NOUNS.length];
const suffix = SUFFIXES[Math.floor(index / (ADJECTIVES.length * NOUNS.length)) % SUFFIXES.length];
return suffix ? `${adj}${noun}${suffix}` : `${adj}${noun}`;
}
// ============ 平台和模型 ============
const PLATFORMS = [
{ name: "claude-code", weight: 40 },
{ name: "cursor", weight: 25 },
{ name: "copilot", weight: 15 },
{ name: "aider", weight: 10 },
{ name: "continue", weight: 5 },
{ name: "zed", weight: 3 },
{ name: "windsurf", weight: 2 },
];
const MODELS = [
{ name: "claude-sonnet-4-6", weight: 35 },
{ name: "claude-opus-4-6", weight: 15 },
{ name: "claude-haiku-4-5", weight: 20 },
{ name: "gpt-4o", weight: 15 },
{ name: "gpt-4-turbo", weight: 8 },
{ name: "gemini-2.0-flash", weight: 5 },
{ name: "deepseek-v3", weight: 2 },
];
function weightedRandom<T extends { weight: number }>(items: T[]): T {
const totalWeight = items.reduce((sum, item) => sum + item.weight, 0);
let random = Math.random() * totalWeight;
for (const item of items) {
random -= item.weight;
if (random <= 0) return item;
}
return items[items.length - 1];
}
// ============ Token 消耗模拟 ============
// 根据活跃程度分级
type ActivityLevel = "heavy" | "moderate" | "light" | "minimal";
const ACTIVITY_DISTRIBUTION: { level: ActivityLevel; weight: number }[] = [
{ level: "heavy", weight: 10 }, // 重度用户
{ level: "moderate", weight: 30 }, // 中度用户
{ level: "light", weight: 40 }, // 轻度用户
{ level: "minimal", weight: 20 }, // 极少用户
];
function getTokenRange(level: ActivityLevel): { min: number; max: number } {
switch (level) {
case "heavy":
return { min: 500_000, max: 5_000_000 }; // 50万 - 500万 tokens/天
case "moderate":
return { min: 100_000, max: 500_000 }; // 10万 - 50万 tokens/天
case "light":
return { min: 20_000, max: 100_000 }; // 2万 - 10万 tokens/天
case "minimal":
return { min: 1_000, max: 20_000 }; // 1千 - 2万 tokens/天
}
}
// ============ 任务摘要模板 ============
const TASK_SUMMARIES = [
"Refactored authentication module for better security",
"Implemented new API endpoints for user management",
"Fixed memory leak in background worker process",
"Added unit tests for payment processing module",
"Optimized database queries for faster response times",
"Integrated third-party OAuth provider",
"Migrated legacy code to TypeScript",
"Built real-time notification system with WebSockets",
"Created responsive dashboard UI components",
"Implemented caching layer with Redis",
"Fixed cross-browser compatibility issues",
"Added internationalization support for 5 languages",
"Refactored state management with Redux",
"Implemented file upload with progress tracking",
"Built automated deployment pipeline",
"Created API documentation with OpenAPI spec",
"Implemented rate limiting for API endpoints",
"Fixed race condition in concurrent processing",
"Added logging and monitoring infrastructure",
"Optimized bundle size by 40%",
"Implemented dark mode theme support",
"Built search functionality with Elasticsearch",
"Created admin panel for content management",
"Fixed security vulnerability in input validation",
"Implemented webhook system for integrations",
"Added two-factor authentication support",
"Built data export functionality",
"Implemented pagination for large datasets",
"Created custom CLI tool for development",
"Fixed timezone handling in date operations",
"Implemented email notification system",
"Built real-time collaboration features",
"Added support for multiple payment gateways",
"Created automated testing suite",
"Implemented feature flags system",
"Built analytics dashboard with charts",
"Fixed memory issues in image processing",
"Added support for large file uploads",
"Implemented user preference system",
"Created backup and restore functionality",
"Built custom form builder component",
"Implemented audit logging system",
"Added support for custom themes",
"Created import/export data functionality",
"Implemented session management",
"Built notification preferences UI",
"Added keyboard shortcuts support",
"Implemented drag-and-drop functionality",
"Created custom error handling middleware",
];
const TOOLS_USED = [
["Read", "Edit", "Bash"],
["Grep", "Read", "Write"],
["Bash", "Glob", "Grep"],
["Read", "Edit", "Grep", "Bash"],
["WebSearch", "Read", "Edit"],
["Agent", "Read", "Grep"],
["Bash", "Read", "Write"],
["Glob", "Grep", "Read"],
["Read", "Edit"],
["Bash", "Read"],
];
// ============ 工具函数 ============
function randomInRange(min: number, max: number): number {
return Math.floor(Math.random() * (max - min + 1)) + min;
}
function randomFloat(min: number, max: number): number {
return Math.random() * (max - min) + min;
}
function randomDate(start: Date, end: Date): Date {
return new Date(start.getTime() + Math.random() * (end.getTime() - start.getTime()));
}
function formatDateString(date: Date): string {
return date.toISOString().split("T")[0];
}
// 模拟 IP 地址生成(基于国家代码的伪 IP
function generateFakeIp(countryCode: string, index: number): string {
const countryPrefix: Record<string, string> = {
US: "8.", CN: "58.", JP: "126.", GB: "2.", DE: "5.",
FR: "46.", KR: "14.", IN: "59.", CA: "24.", AU: "1.",
BR: "177.", NL: "77.", SE: "78.", SG: "203.", HK: "218.",
TW: "61.", IT: "93.", ES: "88.", PL: "79.", ZA: "41.",
};
const prefix = countryPrefix[countryCode] || "10.";
return `${prefix}${randomInRange(1, 255)}.${randomInRange(1, 255)}.${randomInRange(1, 255)}`;
}
// ============ 主生成逻辑 ============
interface MockClaw {
id: string;
apiKey: string;
name: string;
platform: string;
model: string;
ip: string;
latitude: number;
longitude: number;
city: string;
country: string;
countryCode: string;
region: string;
createdAt: Date;
activityLevel: ActivityLevel;
}
function generateApiKey(): string {
const chars = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789";
let key = "oc_";
for (let i = 0; i < 48; i++) {
key += chars.charAt(Math.floor(Math.random() * chars.length));
}
return key;
}
async function generateMockData() {
console.log("🦀 开始生成模拟数据...\n");
const now = new Date();
const startDate = new Date(now.getTime() - DAYS_BACK * 24 * 60 * 60 * 1000);
// 1. 生成所有 claw 数据
const mockClaws: MockClaw[] = [];
const regionWeights = REGION_DISTRIBUTION.map(r => ({ name: r.region, weight: r.weight }));
const regionCounts: Record<string, number> = {};
for (let i = 0; i < TOTAL_CLAWS; i++) {
// 选择地区
const region = weightedRandom(regionWeights);
regionCounts[region.name] = (regionCounts[region.name] || 0) + 1;
// 从该地区选择国家
const regionData = REGION_DISTRIBUTION.find(r => r.region === region.name)!;
const country = regionData.countries[Math.floor(Math.random() * regionData.countries.length)];
// 从该国家选择城市
const cities = CITIES_DATA[country] || [{ name: "Unknown", lat: 0, lng: 0 }];
const city = cities[Math.floor(Math.random() * cities.length)];
// 添加随机偏移(模拟同一城市的不同位置)
const latOffset = randomFloat(-0.1, 0.1);
const lngOffset = randomFloat(-0.1, 0.1);
// 选择平台和模型
const platform = weightedRandom(PLATFORMS);
const model = weightedRandom(MODELS);
// 选择活跃程度
const activity = weightedRandom(ACTIVITY_DISTRIBUTION);
// 随机创建时间
const createdAt = randomDate(startDate, now);
const claw: MockClaw = {
id: nanoid(21),
apiKey: generateApiKey(),
name: generateName(i),
platform: platform.name,
model: model.name,
ip: generateFakeIp(country, i),
latitude: city.lat + latOffset,
longitude: city.lng + lngOffset,
city: city.name,
country: COUNTRY_NAMES[country] || country,
countryCode: country,
region: region.name,
createdAt,
activityLevel: activity.level,
};
mockClaws.push(claw);
}
console.log("📊 地区分布:");
for (const [region, count] of Object.entries(regionCounts)) {
console.log(` ${region}: ${count} 只虾`);
}
console.log("");
// 2. 插入 claws 到数据库
console.log("💾 插入 claws 数据...");
for (const claw of mockClaws) {
await db.insert(claws).values({
id: claw.id,
apiKey: claw.apiKey,
name: claw.name,
platform: claw.platform,
model: claw.model,
ip: claw.ip,
latitude: String(claw.latitude),
longitude: String(claw.longitude),
city: claw.city,
country: claw.country,
countryCode: claw.countryCode,
region: claw.region,
lastHeartbeat: claw.createdAt,
totalTasks: 0,
createdAt: claw.createdAt,
updatedAt: claw.createdAt,
});
}
console.log(` ✅ 已插入 ${mockClaws.length} 只虾\n`);
// 3. 生成心跳数据
console.log("💓 生成心跳数据...");
const heartbeatBatch: { clawId: string; ip: string; timestamp: Date }[] = [];
for (const claw of mockClaws) {
// 根据活跃程度决定心跳频率
let heartbeatCount: number;
switch (claw.activityLevel) {
case "heavy":
heartbeatCount = randomInRange(50, 200); // 高频心跳
break;
case "moderate":
heartbeatCount = randomInRange(20, 50);
break;
case "light":
heartbeatCount = randomInRange(5, 20);
break;
case "minimal":
heartbeatCount = randomInRange(1, 5);
break;
}
// 在注册后到现在之间随机分布心跳
for (let h = 0; h < heartbeatCount; h++) {
const heartbeatTime = randomDate(claw.createdAt, now);
heartbeatBatch.push({
clawId: claw.id,
ip: claw.ip,
timestamp: heartbeatTime,
});
}
}
// 批量插入心跳
const HEARTBEAT_BATCH_SIZE = 1000;
for (let i = 0; i < heartbeatBatch.length; i += HEARTBEAT_BATCH_SIZE) {
const batch = heartbeatBatch.slice(i, i + HEARTBEAT_BATCH_SIZE);
await db.insert(heartbeats).values(batch);
}
console.log(` ✅ 已插入 ${heartbeatBatch.length} 条心跳记录\n`);
// 4. 生成任务数据
console.log("📋 生成任务数据...");
const taskBatch: {
clawId: string;
summary: string;
durationMs: number;
model: string;
toolsUsed: string[];
timestamp: Date;
}[] = [];
for (const claw of mockClaws) {
// 根据活跃程度决定任务数量
let taskCount: number;
switch (claw.activityLevel) {
case "heavy":
taskCount = randomInRange(20, 100);
break;
case "moderate":
taskCount = randomInRange(10, 30);
break;
case "light":
taskCount = randomInRange(3, 15);
break;
case "minimal":
taskCount = randomInRange(0, 5);
break;
}
for (let t = 0; t < taskCount; t++) {
const summary = TASK_SUMMARIES[Math.floor(Math.random() * TASK_SUMMARIES.length)];
const durationMs = randomInRange(10_000, 3_600_000); // 10秒 - 1小时
const toolsUsed = TOOLS_USED[Math.floor(Math.random() * TOOLS_USED.length)];
const taskTime = randomDate(claw.createdAt, now);
taskBatch.push({
clawId: claw.id,
summary,
durationMs,
model: claw.model,
toolsUsed,
timestamp: taskTime,
});
}
}
// 批量插入任务
const TASK_BATCH_SIZE = 500;
for (let i = 0; i < taskBatch.length; i += TASK_BATCH_SIZE) {
const batch = taskBatch.slice(i, i + TASK_BATCH_SIZE);
await db.insert(tasks).values(batch);
}
console.log(` ✅ 已插入 ${taskBatch.length} 条任务记录\n`);
// 5. 更新 claws 的 totalTasks
console.log("🔄 更新任务统计...");
for (const claw of mockClaws) {
const clawTasks = taskBatch.filter(t => t.clawId === claw.id);
if (clawTasks.length > 0) {
await db
.update(claws)
.set({ totalTasks: clawTasks.length })
.where(sql`id = ${claw.id}`);
}
}
console.log(" ✅ 已更新任务统计\n");
// 6. 生成 token 使用数据(按天)
console.log("🔢 生成 Token 使用数据...");
// 生成过去 7 天的日期
const dates: string[] = [];
for (let d = 0; d < DAYS_BACK; d++) {
const date = new Date(now.getTime() - d * 24 * 60 * 60 * 1000);
dates.push(formatDateString(date));
}
const tokenBatch: {
clawId: string;
date: string;
inputTokens: number;
outputTokens: number;
}[] = [];
for (const claw of mockClaws) {
// 确定这只虾的活跃天数
const activeDays = Math.ceil(
(now.getTime() - claw.createdAt.getTime()) / (24 * 60 * 60 * 1000)
);
// 只为注册后的日期生成 token 数据
for (let d = 0; d < Math.min(activeDays, DAYS_BACK); d++) {
// 根据活跃程度决定是否在这一天有活动
const isActiveToday = Math.random() < (
claw.activityLevel === "heavy" ? 0.95 :
claw.activityLevel === "moderate" ? 0.7 :
claw.activityLevel === "light" ? 0.4 :
0.15
);
if (!isActiveToday) continue;
const tokenRange = getTokenRange(claw.activityLevel);
// 实际 token 会有波动
const variance = randomFloat(0.5, 1.5);
const dailyTokens = randomInRange(
Math.floor(tokenRange.min * variance),
Math.floor(tokenRange.max * variance)
);
// input/output 比例约为 3:1
const inputTokens = Math.floor(dailyTokens * 0.75);
const outputTokens = dailyTokens - inputTokens;
tokenBatch.push({
clawId: claw.id,
date: dates[d],
inputTokens,
outputTokens,
});
}
}
// 批量插入 token 数据
const TOKEN_BATCH_SIZE = 500;
for (let i = 0; i < tokenBatch.length; i += TOKEN_BATCH_SIZE) {
const batch = tokenBatch.slice(i, i + TOKEN_BATCH_SIZE);
await db.insert(tokenUsage).values(batch);
}
console.log(` ✅ 已插入 ${tokenBatch.length} 条 Token 使用记录\n`);
// 7. 输出统计摘要
console.log("=" .repeat(50));
console.log("📊 数据生成完成!\n");
console.log("📈 统计摘要:");
console.log(` 总虾数: ${mockClaws.length}`);
console.log(` 心跳记录: ${heartbeatBatch.length}`);
console.log(` 任务记录: ${taskBatch.length}`);
console.log(` Token 记录: ${tokenBatch.length}`);
// 计算总 token
const totalInputTokens = tokenBatch.reduce((sum, t) => sum + t.inputTokens, 0);
const totalOutputTokens = tokenBatch.reduce((sum, t) => sum + t.outputTokens, 0);
console.log(` 总 Input Tokens: ${(totalInputTokens / 1_000_000).toFixed(2)}M`);
console.log(` 总 Output Tokens: ${(totalOutputTokens / 1_000_000).toFixed(2)}M`);
console.log(` 总 Tokens: ${((totalInputTokens + totalOutputTokens) / 1_000_000).toFixed(2)}M`);
console.log("\n🗺 地区分布:");
for (const [region, count] of Object.entries(regionCounts).sort((a, b) => b[1] - a[1])) {
const percentage = ((count / TOTAL_CLAWS) * 100).toFixed(1);
console.log(` ${region}: ${count} (${percentage}%)`);
}
console.log("\n🎯 活跃程度分布:");
const activityCounts: Record<ActivityLevel, number> = {
heavy: 0,
moderate: 0,
light: 0,
minimal: 0,
};
mockClaws.forEach(c => activityCounts[c.activityLevel]++);
console.log(` 重度用户: ${activityCounts.heavy}`);
console.log(` 中度用户: ${activityCounts.moderate}`);
console.log(` 轻度用户: ${activityCounts.light}`);
console.log(` 极少用户: ${activityCounts.minimal}`);
console.log("\n✨ 完成!");
}
// 运行
generateMockData()
.then(() => process.exit(0))
.catch((err) => {
console.error("❌ 错误:", err);
process.exit(1);
});