383 lines
16 KiB
HTML
383 lines
16 KiB
HTML
<!DOCTYPE html>
|
|
<html lang="en">
|
|
<head>
|
|
<meta charset="UTF-8"/>
|
|
<meta name="viewport" content="width=device-width,initial-scale=1"/>
|
|
<title>SiliconPin Spider</title>
|
|
<style>
|
|
*{box-sizing:border-box;margin:0;padding:0}
|
|
body{font-family:'Segoe UI',system-ui,sans-serif;background:#0d1117;color:#c9d1d9;min-height:100vh}
|
|
a{color:#58a6ff;text-decoration:none}
|
|
|
|
/* ── layout ─────────────────────────── */
|
|
.layout{display:grid;grid-template-columns:320px 1fr;height:100vh;overflow:hidden}
|
|
.sidebar{background:#161b22;border-right:1px solid #30363d;display:flex;flex-direction:column;overflow:hidden}
|
|
.main{display:flex;flex-direction:column;overflow:hidden}
|
|
|
|
/* ── sidebar header ─────────────────── */
|
|
.sidebar-header{padding:16px;border-bottom:1px solid #30363d;flex-shrink:0}
|
|
.sidebar-header h1{font-size:1.1rem;color:#58a6ff;display:flex;align-items:center;gap:6px}
|
|
.sidebar-header p{font-size:.75rem;color:#8b949e;margin-top:3px}
|
|
|
|
/* ── add domain form ────────────────── */
|
|
.add-form{padding:12px 16px;border-bottom:1px solid #30363d;flex-shrink:0}
|
|
.add-form .row{display:flex;gap:6px;margin-bottom:8px}
|
|
.add-form input{flex:1;padding:6px 10px;background:#0d1117;border:1px solid #30363d;border-radius:6px;color:#c9d1d9;font-size:.82rem;outline:none}
|
|
.add-form input:focus{border-color:#58a6ff}
|
|
.add-form input.narrow{max-width:80px;flex:none}
|
|
.btn{padding:6px 14px;border:none;border-radius:6px;cursor:pointer;font-size:.82rem;font-weight:600;transition:opacity .15s}
|
|
.btn:hover{opacity:.85}
|
|
.btn-green{background:#238636;color:#fff}
|
|
.btn-gray{background:#30363d;color:#c9d1d9}
|
|
.btn-yellow{background:#9e6a03;color:#fff}
|
|
.btn-blue{background:#1f6feb;color:#fff}
|
|
.add-result{font-size:.75rem;color:#8b949e;min-height:16px}
|
|
|
|
/* ── domain list ────────────────────── */
|
|
.domain-list{flex:1;overflow-y:auto;padding:8px 0}
|
|
.domain-card{padding:10px 16px;cursor:pointer;border-left:3px solid transparent;transition:background .12s}
|
|
.domain-card:hover{background:#1c2128}
|
|
.domain-card.active{background:#1c2128;border-left-color:#58a6ff}
|
|
.domain-card .dc-name{font-size:.88rem;font-weight:600;color:#e6edf3;display:flex;align-items:center;gap:6px}
|
|
.domain-card .dc-meta{font-size:.72rem;color:#8b949e;margin-top:3px;display:flex;gap:10px}
|
|
.domain-card .dc-parent{font-size:.7rem;color:#6e7681;margin-top:2px}
|
|
|
|
/* ── status badge ───────────────────── */
|
|
.badge{font-size:.65rem;padding:1px 7px;border-radius:10px;font-weight:700;white-space:nowrap}
|
|
.b-running {background:#0d4429;color:#3fb950}
|
|
.b-paused {background:#3d2e00;color:#d29922}
|
|
.b-done {background:#0d2040;color:#58a6ff}
|
|
.b-pending {background:#282d33;color:#8b949e}
|
|
|
|
/* ── main area ──────────────────────── */
|
|
.main-header{padding:12px 20px;border-bottom:1px solid #30363d;display:flex;align-items:center;gap:12px;flex-shrink:0;background:#161b22}
|
|
.main-header h2{font-size:1rem;color:#e6edf3;flex:1}
|
|
.conn-dot{width:9px;height:9px;border-radius:50%;background:#3fb950;flex-shrink:0}
|
|
.conn-dot.off{background:#484f58}
|
|
.conn-dot.pulse{animation:pulse 1.5s infinite}
|
|
@keyframes pulse{0%,100%{opacity:1}50%{opacity:.3}}
|
|
|
|
.log-wrap{flex:1;overflow-y:auto;padding:10px 16px;font-family:'Cascadia Code','Fira Code',monospace;font-size:.75rem}
|
|
.ev{display:flex;gap:8px;padding:3px 0;border-bottom:1px solid #161b22;align-items:flex-start}
|
|
.ev-badge{font-size:.67rem;padding:1px 7px;border-radius:10px;font-weight:700;white-space:nowrap;flex-shrink:0;margin-top:1px}
|
|
.ev-body{word-break:break-all;color:#8b949e}
|
|
.ev-body b{color:#c9d1d9}
|
|
|
|
/* event colours */
|
|
.e-connected {background:#0d2040;color:#58a6ff}
|
|
.e-status {background:#1c2128;color:#8b949e}
|
|
.e-robots {background:#3d2200;color:#d29922}
|
|
.e-waiting {background:#0d2e1a;color:#3fb950}
|
|
.e-fetching {background:#0d2040;color:#79c0ff}
|
|
.e-saved {background:#0d2e1a;color:#3fb950}
|
|
.e-links_found {background:#112040;color:#a5d6ff}
|
|
.e-skipped {background:#2d2600;color:#d29922}
|
|
.e-error {background:#3d0000;color:#f85149}
|
|
.e-done {background:#0d2040;color:#79c0ff}
|
|
.e-paused {background:#3d2e00;color:#d29922}
|
|
.e-resumed {background:#0d2e1a;color:#3fb950}
|
|
.e-new_domain {background:#1f1640;color:#d2a8ff}
|
|
.e-shutdown {background:#2d0000;color:#f85149}
|
|
|
|
.empty{color:#484f58;padding:20px;text-align:center;font-size:.82rem}
|
|
.stats-bar{display:flex;gap:16px;padding:6px 20px;background:#0d1117;border-bottom:1px solid #21262d;font-size:.73rem;color:#8b949e;flex-shrink:0}
|
|
.stats-bar span b{color:#c9d1d9}
|
|
|
|
/* ── no-domain placeholder ──────────── */
|
|
.placeholder{flex:1;display:flex;align-items:center;justify-content:center;flex-direction:column;gap:10px;color:#484f58}
|
|
.placeholder svg{opacity:.3}
|
|
</style>
|
|
</head>
|
|
<body>
|
|
<div class="layout">
|
|
|
|
<!-- ═══════════════ SIDEBAR ═══════════════ -->
|
|
<aside class="sidebar">
|
|
<div class="sidebar-header">
|
|
<h1>🕷 SiliconPin Spider</h1>
|
|
<p>Polite web crawler · robots.txt · SSE live</p>
|
|
</div>
|
|
|
|
<div class="add-form">
|
|
<div class="row">
|
|
<input id="inp-domain" placeholder="domain.com" autocomplete="off"/>
|
|
<input id="inp-delay" placeholder="delay s" class="narrow" value="20"/>
|
|
</div>
|
|
<div class="row">
|
|
<button class="btn btn-green" onclick="addDomain()">+ Add & Crawl</button>
|
|
<button class="btn btn-gray" onclick="refreshList()">↻ Refresh</button>
|
|
</div>
|
|
<div class="add-result" id="add-result"></div>
|
|
</div>
|
|
|
|
<div class="domain-list" id="domain-list">
|
|
<div class="empty">No domains yet</div>
|
|
</div>
|
|
</aside>
|
|
|
|
<!-- ═══════════════ MAIN ═══════════════ -->
|
|
<main class="main">
|
|
<div id="main-placeholder" class="placeholder" style="flex:1;display:flex">
|
|
<svg width="64" height="64" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="1">
|
|
<circle cx="12" cy="12" r="10"/><line x1="12" y1="8" x2="12" y2="12"/>
|
|
<line x1="12" y1="16" x2="12.01" y2="16"/>
|
|
</svg>
|
|
<p>Select a domain to watch its live feed</p>
|
|
</div>
|
|
|
|
<div id="main-panel" style="display:none;flex-direction:column;flex:1;overflow:hidden">
|
|
<div class="main-header">
|
|
<span class="conn-dot off" id="conn-dot"></span>
|
|
<h2 id="panel-title">—</h2>
|
|
<button class="btn btn-yellow" id="btn-pause" onclick="pauseDomain()">⏸ Pause</button>
|
|
<button class="btn btn-blue" id="btn-resume" onclick="resumeDomain()" style="display:none">▶ Resume</button>
|
|
<button class="btn btn-gray" onclick="clearLog()">✕ Clear</button>
|
|
</div>
|
|
<div class="stats-bar" id="stats-bar">
|
|
<span>URLs: <b id="stat-urls">—</b></span>
|
|
<span>Queue: <b id="stat-queue">—</b></span>
|
|
<span>Status: <b id="stat-status">—</b></span>
|
|
<span>Interval: <b id="stat-interval">—</b>s</span>
|
|
</div>
|
|
<div class="log-wrap" id="log"></div>
|
|
</div>
|
|
</main>
|
|
|
|
</div>
|
|
|
|
<script>
|
|
// ── state ──────────────────────────────────────────────────
|
|
let activeDomain = null;
|
|
let activeSSE = null;
|
|
let globalSSE = null;
|
|
let domainMap = {}; // domain → row data
|
|
|
|
// ── init ───────────────────────────────────────────────────
|
|
window.addEventListener('DOMContentLoaded', () => {
|
|
refreshList();
|
|
connectGlobalSSE();
|
|
setInterval(refreshList, 15000);
|
|
});
|
|
|
|
// ── global SSE (new_domain, shutdown) ──────────────────────
|
|
function connectGlobalSSE() {
|
|
if (globalSSE) globalSSE.close();
|
|
globalSSE = new EventSource('/api/sse/');
|
|
globalSSE.onmessage = e => {
|
|
try {
|
|
const obj = JSON.parse(e.data);
|
|
if (obj.event === 'new_domain') {
|
|
refreshList();
|
|
// auto-open if it was discovered from the active domain
|
|
if (activeDomain && obj.data.parent === activeDomain) {
|
|
appendLog({event:'new_domain', data:obj.data});
|
|
}
|
|
}
|
|
} catch(_) {}
|
|
};
|
|
}
|
|
|
|
// ── fetch domain list ───────────────────────────────────────
|
|
async function refreshList() {
|
|
const res = await fetch('/api/domains');
|
|
if (!res.ok) return;
|
|
const list = await res.json();
|
|
domainMap = {};
|
|
list.forEach(d => domainMap[d.domain] = d);
|
|
renderList(list);
|
|
if (activeDomain && domainMap[activeDomain]) updateStats(domainMap[activeDomain]);
|
|
}
|
|
|
|
function renderList(list) {
|
|
const el = document.getElementById('domain-list');
|
|
if (!list || list.length === 0) {
|
|
el.innerHTML = '<div class="empty">No domains yet</div>';
|
|
return;
|
|
}
|
|
el.innerHTML = list.map(d => `
|
|
<div class="domain-card${d.domain === activeDomain ? ' active' : ''}"
|
|
id="dc-${esc(d.domain)}" onclick="selectDomain('${esc(d.domain)}')">
|
|
<div class="dc-name">
|
|
<span>${esc(d.domain)}</span>
|
|
<span class="badge b-${d.status}">${d.status}</span>
|
|
</div>
|
|
<div class="dc-meta">
|
|
<span>✓ ${d.url_count} urls</span>
|
|
<span>⏳ ${d.queue_len} queued</span>
|
|
<span>⏱ ${d.interval}s</span>
|
|
</div>
|
|
${d.parent ? `<div class="dc-parent">↳ from ${esc(d.parent)}</div>` : ''}
|
|
</div>`).join('');
|
|
}
|
|
|
|
// ── select domain → connect SSE ────────────────────────────
|
|
function selectDomain(domain) {
|
|
if (activeDomain === domain) return;
|
|
activeDomain = domain;
|
|
|
|
// highlight sidebar
|
|
document.querySelectorAll('.domain-card').forEach(c => c.classList.remove('active'));
|
|
const card = document.getElementById('dc-' + domain);
|
|
if (card) card.classList.add('active');
|
|
|
|
// show panel
|
|
document.getElementById('main-placeholder').style.display = 'none';
|
|
const panel = document.getElementById('main-panel');
|
|
panel.style.display = 'flex';
|
|
document.getElementById('panel-title').textContent = domain;
|
|
clearLog();
|
|
|
|
// update stats
|
|
if (domainMap[domain]) updateStats(domainMap[domain]);
|
|
|
|
// SSE
|
|
if (activeSSE) activeSSE.close();
|
|
setDot(true);
|
|
activeSSE = new EventSource('/api/sse/' + domain);
|
|
activeSSE.onmessage = e => {
|
|
try { appendLog(JSON.parse(e.data)); } catch(_) {}
|
|
};
|
|
activeSSE.onerror = () => setDot(false);
|
|
}
|
|
|
|
function updateStats(d) {
|
|
document.getElementById('stat-urls').textContent = d.url_count;
|
|
document.getElementById('stat-queue').textContent = d.queue_len;
|
|
document.getElementById('stat-status').textContent = d.status;
|
|
document.getElementById('stat-interval').textContent = d.interval;
|
|
|
|
const paused = d.status === 'paused';
|
|
document.getElementById('btn-pause').style.display = paused ? 'none' : '';
|
|
document.getElementById('btn-resume').style.display = paused ? '' : 'none';
|
|
}
|
|
|
|
// ── log rendering ───────────────────────────────────────────
|
|
function appendLog(obj) {
|
|
const event = obj.event || 'status';
|
|
const data = obj.data || {};
|
|
|
|
// update stats inline from events
|
|
if (event === 'saved' || event === 'links_found' || event === 'waiting') {
|
|
if (domainMap[activeDomain] && event === 'saved') {
|
|
domainMap[activeDomain].url_count++;
|
|
document.getElementById('stat-urls').textContent = domainMap[activeDomain].url_count;
|
|
}
|
|
if (data.queue_len !== undefined && domainMap[activeDomain]) {
|
|
domainMap[activeDomain].queue_len = data.queue_len ?? data.queue ?? 0;
|
|
document.getElementById('stat-queue').textContent = domainMap[activeDomain].queue_len;
|
|
}
|
|
}
|
|
if (event === 'paused' || event === 'resumed' || event === 'done') {
|
|
refreshList();
|
|
const st = event === 'paused' ? 'paused' : event === 'done' ? 'done' : 'running';
|
|
document.getElementById('stat-status').textContent = st;
|
|
document.getElementById('btn-pause').style.display = (event === 'paused') ? 'none' : '';
|
|
document.getElementById('btn-resume').style.display = (event === 'paused') ? '' : 'none';
|
|
}
|
|
if (event === 'new_domain') refreshList();
|
|
|
|
const body = formatBody(event, data);
|
|
const log = document.getElementById('log');
|
|
if (log.querySelector('.empty')) log.innerHTML = '';
|
|
|
|
const div = document.createElement('div');
|
|
div.className = 'ev';
|
|
div.innerHTML = `<span class="ev-badge e-${event}">${event}</span><span class="ev-body">${body}</span>`;
|
|
log.appendChild(div);
|
|
log.scrollTop = log.scrollHeight;
|
|
}
|
|
|
|
function formatBody(event, data) {
|
|
if (typeof data === 'string') return esc(data);
|
|
switch (event) {
|
|
case 'waiting':
|
|
return `<b>${esc(data.url||'')}</b> — delay <b>${data.delay_s}s</b>, queue <b>${data.queue}</b>`;
|
|
case 'fetching':
|
|
return `<b>${esc(data.url||'')}</b>`;
|
|
case 'saved':
|
|
return `<b>${esc(data.url||'')}</b> <span style="color:#6e7681">[${data.status} ${esc(data.content_type||'')}]</span>`;
|
|
case 'links_found':
|
|
return `<b>${esc(data.url||'')}</b> — found <b>${data.found}</b> same-host, <b>${data.new}</b> new, <b>${data.external||0}</b> external, queue <b>${data.queue_len}</b>`;
|
|
case 'skipped':
|
|
return `<b>${esc(data.url||'')}</b> — ${esc(data.reason||'')}`;
|
|
case 'error':
|
|
return `<b>${esc(data.url||data.msg||'')}</b>${data.url ? ' — '+esc(data.msg||'') : ''}`;
|
|
case 'robots':
|
|
return `delay <b>${data.robots_delay}s</b> → effective <b>${data.effective_delay}s</b>, disallowed: <b>${(data.disallowed||[]).length}</b>`;
|
|
case 'new_domain':
|
|
return `discovered <b>${esc(data.domain||'')}</b>${data.parent ? ` from <b>${esc(data.parent)}</b>` : ''}`;
|
|
case 'done':
|
|
return `<b>${esc(data.domain||'')}</b> — crawl complete ✓`;
|
|
case 'paused':
|
|
return `crawler paused — send <b>resume</b> to continue`;
|
|
case 'resumed':
|
|
return `crawler resumed`;
|
|
case 'connected':
|
|
return `stream connected for <b>${esc(data.domain||'')}</b>`;
|
|
default:
|
|
return esc(typeof data === 'object' ? JSON.stringify(data) : String(data));
|
|
}
|
|
}
|
|
|
|
// ── add domain ──────────────────────────────────────────────
|
|
async function addDomain() {
|
|
const domain = document.getElementById('inp-domain').value.trim();
|
|
const delay = document.getElementById('inp-delay').value.trim();
|
|
if (!domain) { showResult('Domain is required', true); return; }
|
|
|
|
const res = await fetch('/api/add_domain', {
|
|
method: 'POST',
|
|
headers: {'Content-Type':'application/json'},
|
|
body: JSON.stringify({domain, 'Crawl-delay': delay})
|
|
});
|
|
const data = await res.json();
|
|
if (res.ok) {
|
|
showResult(`✓ ${data.message}`, false);
|
|
document.getElementById('inp-domain').value = '';
|
|
await refreshList();
|
|
selectDomain(data.domain);
|
|
} else {
|
|
showResult(`✗ ${data.error}`, true);
|
|
}
|
|
}
|
|
|
|
function showResult(msg, err) {
|
|
const el = document.getElementById('add-result');
|
|
el.textContent = msg;
|
|
el.style.color = err ? '#f85149' : '#3fb950';
|
|
}
|
|
|
|
// ── pause / resume ──────────────────────────────────────────
|
|
async function pauseDomain() {
|
|
if (!activeDomain) return;
|
|
await fetch('/api/pause/' + activeDomain, {method:'POST'});
|
|
}
|
|
|
|
async function resumeDomain() {
|
|
if (!activeDomain) return;
|
|
await fetch('/api/resume/' + activeDomain, {method:'POST'});
|
|
}
|
|
|
|
// ── utils ───────────────────────────────────────────────────
|
|
function clearLog() {
|
|
document.getElementById('log').innerHTML = '<div class="empty">— waiting for events —</div>';
|
|
}
|
|
|
|
function setDot(live) {
|
|
const dot = document.getElementById('conn-dot');
|
|
dot.className = 'conn-dot' + (live ? ' pulse' : ' off');
|
|
}
|
|
|
|
function esc(s) {
|
|
return String(s)
|
|
.replace(/&/g,'&').replace(/</g,'<')
|
|
.replace(/>/g,'>').replace(/"/g,'"');
|
|
}
|
|
|
|
// keyboard shortcut: Enter in domain input
|
|
document.addEventListener('keydown', e => {
|
|
if (e.key === 'Enter' && document.activeElement.id === 'inp-domain') addDomain();
|
|
});
|
|
</script>
|
|
</body>
|
|
</html> |