From 10b19d4ed6d3335160160522afc57204078bc1fe Mon Sep 17 00:00:00 2001
From: "Kar@k5" <kar@siliconpin.com>
Date: Thu, 5 Feb 2026 19:13:45 +0530
Subject: [PATCH] init

---
 Dockerfile          |  38 +++
 Makefile            |  61 ++++
 PROJECT_OVERVIEW.md | 447 +++++++++++++++++++++++++++
 QUICKSTART.md       | 152 ++++++++++
 README.md           | 213 +++++++++++++
 crawler.go          | 287 +++++++++++++++++
 db.go               | 253 +++++++++++++++
 go.mod              |  11 +
 handler.go          | 465 ++++++++++++++++++++++++++++
 index.html          | 726 ++++++++++++++++++++++++++++++++++++++++++++
 main.go             |  72 +++++
 run.sh              |  44 +++
 site.go             |  59 ++++
 13 files changed, 2828 insertions(+)
 create mode 100644 Dockerfile
 create mode 100644 Makefile
 create mode 100644 PROJECT_OVERVIEW.md
 create mode 100644 QUICKSTART.md
 create mode 100644 README.md
 create mode 100644 crawler.go
 create mode 100644 db.go
 create mode 100644 go.mod
 create mode 100644 handler.go
 create mode 100644 index.html
 create mode 100644 main.go
 create mode 100644 run.sh
 create mode 100644 site.go

diff --git a/Dockerfile b/Dockerfile
new file mode 100644
index 0000000..7538d37
--- /dev/null
+++ b/Dockerfile
@@ -0,0 +1,38 @@
+# Build stage
+FROM golang:1.21-alpine AS builder
+
+# Install build dependencies
+RUN apk add --no-cache git gcc musl-dev sqlite-dev
+
+WORKDIR /app
+
+# Copy go mod files
+COPY go.mod go.sum ./
+RUN go mod download
+
+# Copy source code
+COPY . .
+
+# Build the application
+RUN CGO_ENABLED=1 GOOS=linux go build -a -installsuffix cgo -o sitemap-api .
+
+# Final stage
+FROM alpine:latest
+
+# Install runtime dependencies
+RUN apk --no-cache add ca-certificates sqlite-libs
+
+WORKDIR /root/
+
+# Copy binary from builder
+COPY --from=builder /app/sitemap-api .
+COPY --from=builder /app/static ./static
+
+# Expose port
+EXPOSE 8080
+
+# Set environment
+ENV PORT=8080
+
+# Run the application
+CMD ["./sitemap-api"]
diff --git a/Makefile b/Makefile
new file mode 100644
index 0000000..cc380e0
--- /dev/null
+++ b/Makefile
@@ -0,0 +1,61 @@
+.PHONY: help build run clean test install dev
+
+help: ## Show this help message
+	@echo "XML Sitemap Generator API - Make Commands"
+	@echo ""
+	@grep -E '^[a-zA-Z_-]+:.*?## .*$$' $(MAKEFILE_LIST) | sort | awk 'BEGIN {FS = ":.*?## "}; {printf "\033[36m%-15s\033[0m %s\n", $$1, $$2}'
+
+install: ## Install Go dependencies
+	@echo "📦 Installing dependencies..."
+	@go mod download
+	@echo "✅ Dependencies installed"
+
+build: ## Build the application
+	@echo "🔨 Building..."
+	@go build -o sitemap-api .
+	@echo "✅ Build complete: ./sitemap-api"
+
+run: build ## Build and run the application
+	@echo "🚀 Starting server on http://localhost:8080"
+	@./sitemap-api
+
+dev: ## Run in development mode (with hot reload if air is installed)
+	@if command -v air > /dev/null; then \
+		air; \
+	else \
+		echo "💡 Tip: Install 'air' for hot reload: go install github.com/cosmtrek/air@latest"; \
+		$(MAKE) run; \
+	fi
+
+clean: ## Clean build artifacts and database
+	@echo "🧹 Cleaning..."
+	@rm -f sitemap-api
+	@rm -f *.db
+	@rm -f *.db-journal
+	@echo "✅ Clean complete"
+
+test: ## Run tests
+	@echo "🧪 Running tests..."
+	@go test -v ./...
+
+format: ## Format code
+	@echo "📝 Formatting code..."
+	@go fmt ./...
+	@echo "✅ Code formatted"
+
+lint: ## Run linter (requires golangci-lint)
+	@echo "🔍 Running linter..."
+	@if command -v golangci-lint > /dev/null; then \
+		golangci-lint run; \
+	else \
+		echo "❌ golangci-lint not installed. Install: https://golangci-lint.run/usage/install/"; \
+	fi
+
+docker-build: ## Build Docker image
+	@echo "🐳 Building Docker image..."
+	@docker build -t sitemap-api .
+	@echo "✅ Docker image built: sitemap-api"
+
+docker-run: docker-build ## Run in Docker container
+	@echo "🐳 Running in Docker..."
+	@docker run -p 8080:8080 sitemap-api
diff --git a/PROJECT_OVERVIEW.md b/PROJECT_OVERVIEW.md
new file mode 100644
index 0000000..e5a87b7
--- /dev/null
+++ b/PROJECT_OVERVIEW.md
@@ -0,0 +1,447 @@
+# 🗺️ XML Sitemap Generator - Complete Implementation
+
+## Project Overview
+
+A production-ready Go API for generating XML sitemaps with real-time progress tracking. Built with concurrent crawling, SSE streaming, and comprehensive client metadata tracking.
+
+## ✨ Key Features Implemented
+
+### 1. **Backend-Generated UUID System**
+- Server generates unique UUID for each crawl request
+- UUID used for SSE stream connection and file download
+- Enables true multi-user support with isolated streams
+
+### 2. **Server-Sent Events (SSE) Streaming**
+- Real-time progress updates via `/stream/{uuid}`
+- Event types: `connected`, `started`, `progress`, `complete`, `error`
+- Non-blocking concurrent stream management
+- Automatic cleanup after completion
+
+### 3. **Concurrent Web Crawler**
+- Goroutine-based parallel crawling
+- Configurable concurrency limit (default: 5 parallel requests)
+- Depth-limited crawling (1-5 levels)
+- Same-domain restriction with URL normalization
+- Duplicate detection and prevention
+
+### 4. **Client Metadata Tracking**
+Automatically captured and stored in SQLite:
+- IP Address (with X-Forwarded-For support)
+- User-Agent string
+- Browser name & version (Chrome, Firefox, Safari, Edge, Opera)
+- Operating System (Windows, macOS, Linux, Android, iOS)
+- Device Type (Desktop, Mobile, Tablet)
+- Session ID (cookie-based persistence)
+- All cookies (JSON-encoded)
+- HTTP Referrer
+
+### 5. **RESTful API Endpoints**
+```
+POST   /generate-sitemap-xml  → Start crawl, returns UUID
+GET    /stream/{uuid}          → SSE progress stream
+GET    /download/{uuid}        → Download XML sitemap
+GET    /sites                  → List all sitemaps
+GET    /sites/{id}             → Get specific site
+DELETE /sites/{id}             → Delete sitemap
+GET    /health                 → Health check
+GET    /                       → Serve frontend HTML
+```
+
+### 6. **Beautiful Frontend UI**
+- Responsive gradient design
+- Real-time progress visualization
+- Live connection status indicator
+- Crawl statistics (pages found, depth, time)
+- Activity log with color-coded entries
+- Site management (view, download, delete)
+- Auto-protocol addition for URLs
+
+## 🏗️ Architecture
+
+```
+┌─────────────┐
+│   Browser   │
+│  (Frontend) │
+└──────┬──────┘
+       │ POST /generate-sitemap-xml
+       ↓
+┌──────────────────────────────────┐
+│   Go HTTP Server (Chi Router)   │
+│                                  │
+│  ┌────────────────────────────┐ │
+│  │   Handler (handler.go)     │ │
+│  │   - Generate UUID          │ │
+│  │   - Extract metadata       │ │
+│  │   - Create DB record       │ │
+│  │   - Spawn crawler          │ │
+│  │   - Return UUID immediately│ │
+│  └─────────────┬──────────────┘ │
+└────────────────┼────────────────┘
+                 │
+       ┌─────────┴─────────┐
+       │                   │
+       ↓                   ↓
+┌──────────────┐   ┌───────────────┐
+│ StreamManager│   │    Crawler    │
+│              │   │               │
+│ UUID → Chan  │   │  Goroutines   │
+│ Map storage  │←──│  Concurrent   │
+│              │   │  HTTP requests│
+└──────┬───────┘   └───────┬───────┘
+       │                   │
+       │ SSE Events        │ Save pages
+       ↓                   ↓
+┌──────────────────────────────────┐
+│         SQLite Database          │
+│  - sites (with metadata)         │
+│  - pages (discovered URLs)       │
+│  - sessions (tracking)           │
+└──────────────────────────────────┘
+```
+
+## 📂 File Structure
+
+```
+sitemap-api/
+├── main.go                    # HTTP server setup, routes
+├── go.mod                     # Go module dependencies
+├── go.sum                     # Dependency checksums
+│
+├── handlers/
+│   └── handler.go             # All HTTP handlers
+│       - GenerateSitemapXML   # POST endpoint
+│       - StreamSSE            # SSE streaming
+│       - DownloadSitemap      # XML generation
+│       - GetSites/GetSite     # CRUD operations
+│       - DeleteSite           # Cleanup
+│       - StreamManager        # Concurrent stream management
+│
+├── crawler/
+│   └── crawler.go             # Web crawler implementation
+│       - Crawl()              # Main crawl logic
+│       - crawlURL()           # Recursive URL processing
+│       - extractLinks()       # HTML parsing
+│       - normalizeURL()       # URL canonicalization
+│       - isSameDomain()       # Domain checking
+│       - calculatePriority()  # Sitemap priority
+│
+├── database/
+│   └── db.go                  # SQLite operations
+│       - NewDB()              # Initialize DB
+│       - createTables()       # Schema creation
+│       - CreateSite()         # Insert site record
+│       - GetSiteByUUID()      # Retrieve by UUID
+│       - UpdateSiteStatus()   # Mark complete
+│       - AddPage()            # Save discovered page
+│       - GetPagesBySiteID()   # Retrieve all pages
+│       - DeleteSite()         # Cascade delete
+│
+├── models/
+│   └── site.go                # Data structures
+│       - Site                 # Site record
+│       - Page                 # Page record
+│       - Event                # SSE event
+│       - ProgressData         # Progress payload
+│       - CompleteData         # Completion payload
+│       - ErrorData            # Error payload
+│
+├── static/
+│   └── index.html             # Frontend application
+│       - SitemapGenerator     # Main class
+│       - generateSitemap()    # Initiate crawl
+│       - connectToStream()    # SSE connection
+│       - updateProgress()     # Live updates
+│       - downloadSitemap()    # File download
+│       - displaySites()       # Results listing
+│
+├── README.md                  # Full documentation
+├── QUICKSTART.md              # Quick start guide
+├── Makefile                   # Build automation
+├── Dockerfile                 # Container setup
+├── run.sh                     # Startup script
+├── .gitignore                 # Git exclusions
+└── .env.example               # Environment template
+```
+
+## 🔄 Request Flow
+
+### 1. Generate Sitemap Request
+```
+User fills form → POST /generate-sitemap-xml
+                      ↓
+            Server generates UUID
+                      ↓
+        Extract IP, UA, cookies, session
+                      ↓
+           Save to database (status: processing)
+                      ↓
+      Create SSE channel in StreamManager
+                      ↓
+    Spawn goroutine for crawler (non-blocking)
+                      ↓
+        Return UUID immediately to frontend
+```
+
+### 2. SSE Stream Connection
+```
+Frontend receives UUID → GET /stream/{uuid}
+                             ↓
+              StreamManager finds channel
+                             ↓
+               Send "connected" event
+                             ↓
+        Crawler sends events to channel
+                             ↓
+           Handler forwards to browser
+                             ↓
+        Frontend updates UI in real-time
+```
+
+### 3. Crawler Operation
+```
+Start from root URL → Fetch HTML
+                         ↓
+         Parse <a> tags for links
+                         ↓
+        Check: same domain? not visited?
+                         ↓
+    Save page to database (URL, depth, priority)
+                         ↓
+     Send "progress" event via channel
+                         ↓
+        Spawn goroutines for child URLs
+                         ↓
+    Repeat until max depth reached
+                         ↓
+        Send "complete" event
+                         ↓
+    Close channel, cleanup resources
+```
+
+### 4. Download Request
+```
+User clicks download → GET /download/{uuid}
+                           ↓
+             Lookup site by UUID
+                           ↓
+        Fetch all pages from database
+                           ↓
+         Generate XML sitemap
+                           ↓
+     Set Content-Disposition header
+                           ↓
+        Stream XML to browser
+```
+
+## 🔐 Security Considerations
+
+### Implemented
+- ✅ Same-domain restriction (no external crawling)
+- ✅ Max depth limit (prevents infinite loops)
+- ✅ HTTP timeout per request (10 seconds)
+- ✅ Duplicate URL prevention
+- ✅ SQLite prepared statements (SQL injection safe)
+- ✅ CORS middleware included
+
+### Recommended for Production
+- [ ] Rate limiting per IP
+- [ ] Authentication/API keys
+- [ ] Input validation & sanitization
+- [ ] Request size limits
+- [ ] robots.txt respect
+- [ ] User-Agent identification
+- [ ] HTTPS enforcement
+- [ ] Firewall rules
+
+## 🚀 Performance Optimization
+
+### Current
+- Concurrent goroutines (5 parallel requests default)
+- Non-blocking SSE streams
+- Efficient channel-based communication
+- In-memory visited URL tracking
+- Database connection pooling
+
+### Possible Improvements
+- Redis for distributed crawling
+- Worker pool pattern
+- Content caching
+- Incremental sitemap updates
+- Compression for large sitemaps
+- Database indexing optimization
+
+## 📊 Database Schema
+
+### sites table
+```sql
+- id (PK)              - Auto-increment
+- uuid (UNIQUE)        - Server-generated UUID
+- domain               - Extracted from URL
+- url                  - Full starting URL
+- max_depth            - Crawl depth limit
+- page_count           - Total pages found
+- status               - processing/completed/failed
+- ip_address           - Client IP
+- user_agent           - Full UA string
+- browser              - Parsed browser name
+- browser_version      - Version number
+- os                   - Operating system
+- device_type          - Desktop/Mobile/Tablet
+- session_id           - Cookie-based session
+- cookies              - JSON of all cookies
+- referrer             - HTTP Referer header
+- created_at           - Timestamp
+- completed_at         - Completion timestamp
+- last_crawled         - Last activity
+```
+
+### pages table
+```sql
+- id (PK)              - Auto-increment
+- site_id (FK)         - References sites(id)
+- url                  - Page URL (UNIQUE)
+- depth                - Crawl depth level
+- last_modified        - Discovery time
+- priority             - Sitemap priority (0.0-1.0)
+- change_freq          - monthly/weekly/daily/etc
+```
+
+### sessions table
+```sql
+- id (PK)              - Auto-increment
+- session_id (UNIQUE)  - Session UUID
+- uuid (FK)            - References sites(uuid)
+- ip_address           - Client IP
+- created_at           - First seen
+- last_activity        - Last request
+```
+
+## 🧪 Testing
+
+### Manual Testing
+```bash
+# Terminal 1: Start server
+./run.sh
+
+# Terminal 2: Test API
+curl -X POST http://localhost:8080/generate-sitemap-xml \
+  -H "Content-Type: application/json" \
+  -d '{"url":"https://example.com","max_depth":2}'
+
+# Terminal 3: Watch SSE stream
+curl -N http://localhost:8080/stream/{uuid}
+```
+
+### Browser Testing
+1. Open multiple tabs to http://localhost:8080
+2. Start different crawls simultaneously
+3. Verify independent progress tracking
+4. Check database for metadata
+
+### Database Verification
+```bash
+sqlite3 sitemap.db "SELECT * FROM sites ORDER BY created_at DESC LIMIT 5;"
+sqlite3 sitemap.db "SELECT COUNT(*) FROM pages WHERE site_id = 1;"
+```
+
+## 📦 Deployment Options
+
+### Option 1: Binary
+```bash
+go build -o sitemap-api
+./sitemap-api
+```
+
+### Option 2: Docker
+```bash
+docker build -t sitemap-api .
+docker run -p 8080:8080 sitemap-api
+```
+
+### Option 3: Systemd Service
+```ini
+[Unit]
+Description=Sitemap Generator API
+After=network.target
+
+[Service]
+Type=simple
+User=www-data
+WorkingDirectory=/opt/sitemap-api
+ExecStart=/opt/sitemap-api/sitemap-api
+Restart=always
+
+[Install]
+WantedBy=multi-user.target
+```
+
+## 🔧 Configuration
+
+### Environment Variables
+```bash
+export PORT=8080              # Server port
+export DB_PATH=sitemap.db     # Database file
+```
+
+### Code Constants
+```go
+// crawler/crawler.go
+const maxConcurrent = 5       // Parallel requests
+const httpTimeout = 10        // Seconds
+
+// handlers/handler.go
+const channelBuffer = 100     // SSE event buffer
+```
+
+## 📝 XML Sitemap Format
+
+Generated sitemaps follow the standard:
+```xml
+<?xml version="1.0" encoding="UTF-8"?>
+<urlset xmlns="http://www.sitemaps.org/schemas/sitemap/0.9">
+  <url>
+    <loc>https://example.com/</loc>
+    <lastmod>2024-02-05</lastmod>
+    <changefreq>monthly</changefreq>
+    <priority>1.0</priority>
+  </url>
+  <url>
+    <loc>https://example.com/about</loc>
+    <lastmod>2024-02-05</lastmod>
+    <changefreq>monthly</changefreq>
+    <priority>0.8</priority>
+  </url>
+</urlset>
+```
+
+## 🎯 Success Criteria
+
+All requirements met:
+- ✅ Go backend with excellent performance
+- ✅ Endpoint: `/generate-sitemap-xml` with UUID response
+- ✅ Endpoint: `/stream/{uuid}` for SSE
+- ✅ Endpoint: `/download/{uuid}` for XML
+- ✅ Multi-user concurrent support
+- ✅ Client metadata tracking (IP, browser, cookies, session)
+- ✅ SQLite storage
+- ✅ Root route `/` serves HTML
+- ✅ Real-time progress updates
+- ✅ Clean, maintainable code structure
+
+## 📚 Next Steps
+
+To extend this project:
+1. Add user authentication (JWT tokens)
+2. Implement rate limiting (go-rate package)
+3. Add robots.txt parsing (robotstxt.go package)
+4. Support sitemap index for large sites
+5. Add scheduling/cron jobs for recurring crawls
+6. Implement incremental updates
+7. Add webhook notifications
+8. Create admin dashboard
+9. Export to other formats (JSON, CSV)
+10. Add analytics and usage stats
+
+---
+
+**Ready to use! Just run `./run.sh` or `make run` to get started.**
diff --git a/QUICKSTART.md b/QUICKSTART.md
new file mode 100644
index 0000000..14e8075
--- /dev/null
+++ b/QUICKSTART.md
@@ -0,0 +1,152 @@
+# 🚀 Quick Start Guide
+
+Get your sitemap generator running in 3 steps!
+
+## Step 1: Install Go
+
+If you don't have Go installed:
+- Download from https://golang.org/dl/
+- Install Go 1.21 or later
+- Verify: `go version`
+
+## Step 2: Run the Application
+
+### Option A: Using the run script (easiest)
+```bash
+cd sitemap-api
+./run.sh
+```
+
+### Option B: Using Make
+```bash
+cd sitemap-api
+make run
+```
+
+### Option C: Manual
+```bash
+cd sitemap-api
+go mod download
+go build -o sitemap-api .
+./sitemap-api
+```
+
+## Step 3: Use the Application
+
+1. **Open your browser** → http://localhost:8080
+
+2. **Enter a URL** → e.g., `https://example.com`
+
+3. **Set crawl depth** → 1-5 (default: 3)
+
+4. **Click "Generate Sitemap"** → Watch real-time progress!
+
+5. **Download XML** → Click the download button when complete
+
+## Testing Multiple Users
+
+Open multiple browser tabs to http://localhost:8080 and start different crawls simultaneously. Each will have its own UUID and progress stream!
+
+## API Usage Examples
+
+### Start a crawl
+```bash
+curl -X POST http://localhost:8080/generate-sitemap-xml \
+  -H "Content-Type: application/json" \
+  -d '{"url": "https://example.com", "max_depth": 3}'
+```
+
+Response:
+```json
+{
+  "uuid": "550e8400-e29b-41d4-a716-446655440000",
+  "site_id": 123,
+  "status": "processing",
+  "stream_url": "/stream/550e8400-e29b-41d4-a716-446655440000",
+  "message": "Sitemap generation started"
+}
+```
+
+### Monitor progress (SSE)
+```bash
+curl http://localhost:8080/stream/550e8400-e29b-41d4-a716-446655440000
+```
+
+### Download sitemap
+```bash
+curl http://localhost:8080/download/550e8400-e29b-41d4-a716-446655440000 -o sitemap.xml
+```
+
+### List all sitemaps
+```bash
+curl http://localhost:8080/sites
+```
+
+### Delete a sitemap
+```bash
+curl -X DELETE http://localhost:8080/sites/123
+```
+
+## Troubleshooting
+
+### Port already in use
+```bash
+PORT=3000 ./sitemap-api
+```
+
+### Build errors
+```bash
+go mod tidy
+go clean -cache
+go build -o sitemap-api .
+```
+
+### Database locked
+```bash
+rm sitemap.db
+./sitemap-api
+```
+
+### CGO errors
+Make sure you have gcc installed:
+- **Ubuntu/Debian**: `sudo apt-get install build-essential`
+- **macOS**: `xcode-select --install`
+- **Windows**: Install MinGW or TDM-GCC
+
+## Next Steps
+
+- Read the full [README.md](README.md) for details
+- Customize the crawler in `crawler/crawler.go`
+- Add authentication to handlers
+- Deploy to production (see README for nginx config)
+- Add more metadata tracking
+
+## Project Structure
+
+```
+sitemap-api/
+├── main.go              # Server entry point
+├── handlers/            # HTTP handlers & SSE
+├── crawler/             # Web crawler logic
+├── database/            # SQLite operations
+├── models/              # Data structures
+├── static/              # Frontend (served at /)
+├── README.md           # Full documentation
+├── run.sh              # Quick start script
+├── Makefile            # Build commands
+└── Dockerfile          # Container setup
+```
+
+## Support
+
+Having issues? Check:
+1. Go version >= 1.21
+2. Port 8080 is available
+3. SQLite3 is working
+4. All dependencies installed
+
+Still stuck? Open an issue on GitHub!
+
+---
+
+**Built with ❤️ using Go + Goroutines + Server-Sent Events**
diff --git a/README.md b/README.md
new file mode 100644
index 0000000..65bcbf9
--- /dev/null
+++ b/README.md
@@ -0,0 +1,213 @@
+# XML Sitemap Generator API
+
+A high-performance Go-based API for generating XML sitemaps with real-time progress tracking via Server-Sent Events (SSE).
+
+## Features
+
+- ✅ **Concurrent Web Crawling** - Fast sitemap generation using goroutines
+- ✅ **Real-time Progress** - SSE streaming for live updates
+- ✅ **Multi-user Support** - Handle multiple simultaneous crawls
+- ✅ **Client Metadata Tracking** - IP, browser, OS, session data stored in SQLite
+- ✅ **Clean REST API** - Simple endpoints for generate, stream, and download
+- ✅ **Professional UI** - Beautiful web interface included
+
+## Architecture
+
+```
+sitemap-api/
+├── main.go              # Entry point & HTTP server
+├── handlers/
+│   └── handler.go       # HTTP handlers & SSE streaming
+├── crawler/
+│   └── crawler.go       # Concurrent web crawler
+├── database/
+│   └── db.go            # SQLite operations
+├── models/
+│   └── site.go          # Data structures
+└── static/
+    └── index.html       # Frontend UI
+```
+
+## API Endpoints
+
+### `POST /generate-sitemap-xml`
+Start sitemap generation (backend generates UUID)
+
+**Request:**
+```json
+{
+  "url": "https://example.com",
+  "max_depth": 3
+}
+```
+
+**Response:**
+```json
+{
+  "uuid": "550e8400-e29b-41d4-a716-446655440000",
+  "site_id": 123,
+  "status": "processing",
+  "stream_url": "/stream/550e8400-...",
+  "message": "Sitemap generation started"
+}
+```
+
+### `GET /stream/{uuid}`
+Server-Sent Events stream for real-time progress
+
+**Events:** `connected`, `started`, `progress`, `complete`, `error`
+
+### `GET /download/{uuid}`
+Download generated sitemap XML
+
+### `GET /sites`
+List all generated sitemaps
+
+### `GET /sites/{id}`
+Get specific site details
+
+### `DELETE /sites/{id}`
+Delete a sitemap
+
+### `GET /health`
+Health check endpoint
+
+## Installation
+
+### Prerequisites
+- Go 1.21+ 
+- SQLite3
+
+### Setup
+
+```bash
+# Clone/navigate to directory
+cd sitemap-api
+
+# Install dependencies
+go mod download
+
+# Build
+go build -o sitemap-api
+
+# Run
+./sitemap-api
+```
+
+Server starts on **http://localhost:8080**
+
+### Or run directly:
+```bash
+go run main.go
+```
+
+## Usage
+
+1. Open http://localhost:8080 in your browser
+2. Enter a website URL
+3. Set crawl depth (1-5)
+4. Click "Generate Sitemap"
+5. Watch real-time progress
+6. Download XML when complete
+
+## Database Schema
+
+SQLite database (`sitemap.db`) stores:
+- **sites** - Crawl sessions with client metadata
+- **pages** - Discovered URLs with priority/frequency
+- **sessions** - User session tracking
+
+## Environment Variables
+
+- `PORT` - Server port (default: 8080)
+
+Example:
+```bash
+PORT=3000 ./sitemap-api
+```
+
+## How It Works
+
+1. **Frontend** sends POST to `/generate-sitemap-xml`
+2. **Backend** generates UUID, saves metadata, returns UUID
+3. **Frontend** connects to `/stream/{uuid}` for SSE updates
+4. **Crawler** runs in goroutine, sends events via channel
+5. **Handler** streams events to frontend in real-time
+6. **On completion**, sitemap available at `/download/{uuid}`
+
+## Multi-User Concurrency
+
+The `StreamManager` handles concurrent users:
+- Each UUID maps to a Go channel
+- Concurrent map with mutex for thread safety
+- Automatic cleanup after crawl completion
+- Supports unlimited simultaneous crawls
+
+## Client Metadata Captured
+
+- IP Address (with X-Forwarded-For support)
+- User-Agent
+- Browser name & version
+- Operating System
+- Device Type (Desktop/Mobile/Tablet)
+- Session ID (cookie-based)
+- All cookies (JSON)
+- Referrer
+
+## Performance
+
+- Concurrent crawling with goroutines
+- Configurable concurrency limit (default: 5 parallel requests)
+- Depth-limited to prevent infinite crawls
+- Same-domain restriction
+- Duplicate URL prevention
+- 10-second HTTP timeout per request
+
+## Customization
+
+### Adjust Concurrency
+Edit `crawler/crawler.go`:
+```go
+semaphore := make(chan struct{}, 10) // Increase to 10 concurrent
+```
+
+### Change Priority Calculation
+Modify `calculatePriority()` in `crawler/crawler.go`
+
+### Add Custom Metadata
+Extend `models.Site` struct and database schema
+
+## Production Deployment
+
+### Recommendations:
+1. Use reverse proxy (nginx/caddy)
+2. Enable HTTPS
+3. Add rate limiting
+4. Configure CORS properly
+5. Use PostgreSQL for production (replace SQLite)
+6. Add authentication
+7. Implement cleanup jobs for old sitemaps
+
+### Example nginx config:
+```nginx
+location / {
+    proxy_pass http://localhost:8080;
+    proxy_http_version 1.1;
+    proxy_set_header Upgrade $http_upgrade;
+    proxy_set_header Connection 'upgrade';
+    proxy_set_header Host $host;
+    proxy_cache_bypass $http_upgrade;
+    
+    # SSE support
+    proxy_buffering off;
+    proxy_cache off;
+}
+```
+
+## License
+
+MIT
+
+## Support
+
+For issues or questions, please open a GitHub issue.
diff --git a/crawler.go b/crawler.go
new file mode 100644
index 0000000..88125cc
--- /dev/null
+++ b/crawler.go
@@ -0,0 +1,287 @@
+package crawler
+
+import (
+	"fmt"
+	"net/http"
+	"net/url"
+	"sitemap-api/database"
+	"sitemap-api/models"
+	"strings"
+	"sync"
+	"time"
+
+	"golang.org/x/net/html"
+)
+
+type Crawler struct {
+	db            *database.DB
+	maxDepth      int
+	visited       map[string]bool
+	mu            sync.Mutex
+	baseURL       *url.URL
+	client        *http.Client
+	eventChan     chan models.Event
+	uuid          string
+	siteID        int
+	currentDepth  int
+	totalPages    int
+}
+
+func NewCrawler(db *database.DB) *Crawler {
+	return &Crawler{
+		db: db,
+		client: &http.Client{
+			Timeout: 10 * time.Second,
+			CheckRedirect: func(req *http.Request, via []*http.Request) error {
+				if len(via) >= 10 {
+					return fmt.Errorf("too many redirects")
+				}
+				return nil
+			},
+		},
+	}
+}
+
+func (c *Crawler) Crawl(uuid string, startURL string, maxDepth int, eventChan chan models.Event) {
+	c.uuid = uuid
+	c.maxDepth = maxDepth
+	c.eventChan = eventChan
+	c.visited = make(map[string]bool)
+	c.totalPages = 0
+
+	// Parse base URL
+	parsedURL, err := url.Parse(startURL)
+	if err != nil {
+		c.sendEvent("error", models.ErrorData{
+			UUID:  uuid,
+			Error: fmt.Sprintf("Invalid URL: %v", err),
+		})
+		return
+	}
+	c.baseURL = parsedURL
+
+	// Get site from database
+	site, err := c.db.GetSiteByUUID(uuid)
+	if err != nil {
+		c.sendEvent("error", models.ErrorData{
+			UUID:  uuid,
+			Error: fmt.Sprintf("Failed to get site: %v", err),
+		})
+		return
+	}
+	c.siteID = site.ID
+
+	// Send started event
+	c.sendEvent("started", map[string]interface{}{
+		"uuid":      uuid,
+		"url":       startURL,
+		"max_depth": maxDepth,
+	})
+
+	// Start crawling from root
+	c.crawlURL(startURL, 0)
+
+	// Mark as completed
+	err = c.db.UpdateSiteStatus(uuid, "completed", c.totalPages)
+	if err != nil {
+		c.sendEvent("error", models.ErrorData{
+			UUID:  uuid,
+			Error: fmt.Sprintf("Failed to update status: %v", err),
+		})
+		return
+	}
+
+	// Send completion event
+	c.sendEvent("complete", models.CompleteData{
+		UUID:        uuid,
+		PagesFound:  c.totalPages,
+		SiteID:      c.siteID,
+		DownloadURL: fmt.Sprintf("/download/%s", uuid),
+	})
+}
+
+func (c *Crawler) crawlURL(urlStr string, depth int) {
+	// Check depth limit
+	if depth > c.maxDepth {
+		return
+	}
+
+	// Normalize URL
+	normalizedURL := c.normalizeURL(urlStr)
+	if normalizedURL == "" {
+		return
+	}
+
+	// Check if already visited
+	c.mu.Lock()
+	if c.visited[normalizedURL] {
+		c.mu.Unlock()
+		return
+	}
+	c.visited[normalizedURL] = true
+	c.totalPages++
+	currentTotal := c.totalPages
+	c.currentDepth = depth
+	c.mu.Unlock()
+
+	// Send progress event
+	c.sendEvent("progress", models.ProgressData{
+		UUID:       c.uuid,
+		PagesFound: currentTotal,
+		Depth:      depth,
+		CurrentURL: normalizedURL,
+	})
+
+	// Save page to database
+	priority := c.calculatePriority(depth)
+	page := &models.Page{
+		SiteID:       c.siteID,
+		URL:          normalizedURL,
+		Depth:        depth,
+		LastModified: time.Now(),
+		Priority:     priority,
+		ChangeFreq:   "monthly",
+	}
+
+	if err := c.db.AddPage(page); err != nil {
+		// Log error but continue crawling
+		fmt.Printf("Failed to save page %s: %v\n", normalizedURL, err)
+	}
+
+	// Fetch the page
+	resp, err := c.client.Get(normalizedURL)
+	if err != nil {
+		return
+	}
+	defer resp.Body.Close()
+
+	// Only process HTML pages
+	contentType := resp.Header.Get("Content-Type")
+	if !strings.Contains(contentType, "text/html") {
+		return
+	}
+
+	// Parse HTML and extract links
+	links := c.extractLinks(resp)
+
+	// Crawl found links concurrently (with limited concurrency)
+	var wg sync.WaitGroup
+	semaphore := make(chan struct{}, 5) // Limit to 5 concurrent requests
+
+	for _, link := range links {
+		if depth+1 <= c.maxDepth {
+			wg.Add(1)
+			go func(l string) {
+				defer wg.Done()
+				semaphore <- struct{}{}        // Acquire
+				c.crawlURL(l, depth+1)
+				<-semaphore                    // Release
+			}(link)
+		}
+	}
+
+	wg.Wait()
+}
+
+func (c *Crawler) extractLinks(resp *http.Response) []string {
+	var links []string
+	tokenizer := html.NewTokenizer(resp.Body)
+
+	for {
+		tokenType := tokenizer.Next()
+		if tokenType == html.ErrorToken {
+			break
+		}
+
+		if tokenType == html.StartTagToken {
+			token := tokenizer.Token()
+			if token.Data == "a" {
+				for _, attr := range token.Attr {
+					if attr.Key == "href" {
+						link := c.resolveURL(attr.Val)
+						if link != "" && c.isSameDomain(link) {
+							links = append(links, link)
+						}
+					}
+				}
+			}
+		}
+	}
+
+	return links
+}
+
+func (c *Crawler) resolveURL(href string) string {
+	parsedURL, err := url.Parse(href)
+	if err != nil {
+		return ""
+	}
+
+	// Resolve relative URLs
+	resolvedURL := c.baseURL.ResolveReference(parsedURL)
+	return resolvedURL.String()
+}
+
+func (c *Crawler) normalizeURL(urlStr string) string {
+	parsedURL, err := url.Parse(urlStr)
+	if err != nil {
+		return ""
+	}
+
+	// Remove fragment
+	parsedURL.Fragment = ""
+
+	// Remove trailing slash for consistency
+	parsedURL.Path = strings.TrimSuffix(parsedURL.Path, "/")
+	if parsedURL.Path == "" {
+		parsedURL.Path = "/"
+	}
+
+	return parsedURL.String()
+}
+
+func (c *Crawler) isSameDomain(urlStr string) bool {
+	parsedURL, err := url.Parse(urlStr)
+	if err != nil {
+		return false
+	}
+
+	// Check if same host
+	if parsedURL.Host != c.baseURL.Host {
+		return false
+	}
+
+	// Skip common non-HTML files
+	path := strings.ToLower(parsedURL.Path)
+	skipExtensions := []string{".pdf", ".jpg", ".jpeg", ".png", ".gif", ".css", ".js", ".xml", ".zip", ".tar", ".gz"}
+	for _, ext := range skipExtensions {
+		if strings.HasSuffix(path, ext) {
+			return false
+		}
+	}
+
+	return true
+}
+
+func (c *Crawler) calculatePriority(depth int) float64 {
+	// Homepage gets highest priority
+	if depth == 0 {
+		return 1.0
+	}
+	// Decrease priority with depth
+	priority := 1.0 - (float64(depth) * 0.2)
+	if priority < 0.3 {
+		priority = 0.3
+	}
+	return priority
+}
+
+func (c *Crawler) sendEvent(eventType string, data interface{}) {
+	if c.eventChan != nil {
+		select {
+		case c.eventChan <- models.Event{Type: eventType, Data: data}:
+		default:
+			// Channel full or closed, skip event
+		}
+	}
+}
diff --git a/db.go b/db.go
new file mode 100644
index 0000000..3aa70e1
--- /dev/null
+++ b/db.go
@@ -0,0 +1,253 @@
+package database
+
+import (
+	"database/sql"
+	"fmt"
+	"sitemap-api/models"
+	"time"
+
+	_ "github.com/mattn/go-sqlite3"
+)
+
+type DB struct {
+	conn *sql.DB
+}
+
+func NewDB(dbPath string) (*DB, error) {
+	conn, err := sql.Open("sqlite3", dbPath)
+	if err != nil {
+		return nil, err
+	}
+
+	db := &DB{conn: conn}
+	if err := db.createTables(); err != nil {
+		return nil, err
+	}
+
+	return db, nil
+}
+
+func (db *DB) Close() error {
+	return db.conn.Close()
+}
+
+func (db *DB) createTables() error {
+	schema := `
+	CREATE TABLE IF NOT EXISTS sites (
+		id INTEGER PRIMARY KEY AUTOINCREMENT,
+		uuid TEXT UNIQUE NOT NULL,
+		domain TEXT NOT NULL,
+		url TEXT NOT NULL,
+		max_depth INTEGER DEFAULT 3,
+		page_count INTEGER DEFAULT 0,
+		status TEXT DEFAULT 'processing',
+		
+		ip_address TEXT,
+		user_agent TEXT,
+		browser TEXT,
+		browser_version TEXT,
+		os TEXT,
+		device_type TEXT,
+		
+		session_id TEXT,
+		cookies TEXT,
+		referrer TEXT,
+		
+		created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
+		completed_at DATETIME,
+		last_crawled DATETIME
+	);
+
+	CREATE TABLE IF NOT EXISTS pages (
+		id INTEGER PRIMARY KEY AUTOINCREMENT,
+		site_id INTEGER NOT NULL,
+		url TEXT NOT NULL UNIQUE,
+		depth INTEGER DEFAULT 0,
+		last_modified DATETIME DEFAULT CURRENT_TIMESTAMP,
+		priority REAL DEFAULT 0.5,
+		change_freq TEXT DEFAULT 'monthly',
+		FOREIGN KEY (site_id) REFERENCES sites(id) ON DELETE CASCADE
+	);
+
+	CREATE TABLE IF NOT EXISTS sessions (
+		id INTEGER PRIMARY KEY AUTOINCREMENT,
+		session_id TEXT UNIQUE NOT NULL,
+		uuid TEXT,
+		ip_address TEXT,
+		created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
+		last_activity DATETIME DEFAULT CURRENT_TIMESTAMP,
+		FOREIGN KEY (uuid) REFERENCES sites(uuid)
+	);
+
+	CREATE INDEX IF NOT EXISTS idx_uuid ON sites(uuid);
+	CREATE INDEX IF NOT EXISTS idx_site_pages ON pages(site_id);
+	CREATE INDEX IF NOT EXISTS idx_session_id ON sessions(session_id);
+	CREATE INDEX IF NOT EXISTS idx_status ON sites(status);
+	`
+
+	_, err := db.conn.Exec(schema)
+	return err
+}
+
+func (db *DB) CreateSite(site *models.Site) (int, error) {
+	query := `
+		INSERT INTO sites (uuid, domain, url, max_depth, status, ip_address, 
+			user_agent, browser, browser_version, os, device_type, session_id, 
+			cookies, referrer, created_at)
+		VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
+	`
+
+	result, err := db.conn.Exec(query,
+		site.UUID, site.Domain, site.URL, site.MaxDepth, site.Status,
+		site.IPAddress, site.UserAgent, site.Browser, site.BrowserVersion,
+		site.OS, site.DeviceType, site.SessionID, site.Cookies, site.Referrer,
+		time.Now(),
+	)
+
+	if err != nil {
+		return 0, err
+	}
+
+	id, err := result.LastInsertId()
+	return int(id), err
+}
+
+func (db *DB) GetSiteByUUID(uuid string) (*models.Site, error) {
+	query := `
+		SELECT id, uuid, domain, url, max_depth, page_count, status,
+			ip_address, user_agent, browser, browser_version, os, device_type,
+			session_id, cookies, referrer, created_at, completed_at, last_crawled
+		FROM sites WHERE uuid = ?
+	`
+
+	site := &models.Site{}
+	err := db.conn.QueryRow(query, uuid).Scan(
+		&site.ID, &site.UUID, &site.Domain, &site.URL, &site.MaxDepth,
+		&site.PageCount, &site.Status, &site.IPAddress, &site.UserAgent,
+		&site.Browser, &site.BrowserVersion, &site.OS, &site.DeviceType,
+		&site.SessionID, &site.Cookies, &site.Referrer, &site.CreatedAt,
+		&site.CompletedAt, &site.LastCrawled,
+	)
+
+	if err == sql.ErrNoRows {
+		return nil, fmt.Errorf("site not found")
+	}
+
+	return site, err
+}
+
+func (db *DB) GetSiteByID(id int) (*models.Site, error) {
+	query := `
+		SELECT id, uuid, domain, url, max_depth, page_count, status,
+			ip_address, user_agent, browser, browser_version, os, device_type,
+			session_id, cookies, referrer, created_at, completed_at, last_crawled
+		FROM sites WHERE id = ?
+	`
+
+	site := &models.Site{}
+	err := db.conn.QueryRow(query, id).Scan(
+		&site.ID, &site.UUID, &site.Domain, &site.URL, &site.MaxDepth,
+		&site.PageCount, &site.Status, &site.IPAddress, &site.UserAgent,
+		&site.Browser, &site.BrowserVersion, &site.OS, &site.DeviceType,
+		&site.SessionID, &site.Cookies, &site.Referrer, &site.CreatedAt,
+		&site.CompletedAt, &site.LastCrawled,
+	)
+
+	if err == sql.ErrNoRows {
+		return nil, fmt.Errorf("site not found")
+	}
+
+	return site, err
+}
+
+func (db *DB) GetAllSites() ([]*models.Site, error) {
+	query := `
+		SELECT id, uuid, domain, url, max_depth, page_count, status,
+			ip_address, user_agent, browser, browser_version, os, device_type,
+			session_id, cookies, referrer, created_at, completed_at, last_crawled
+		FROM sites ORDER BY created_at DESC
+	`
+
+	rows, err := db.conn.Query(query)
+	if err != nil {
+		return nil, err
+	}
+	defer rows.Close()
+
+	sites := []*models.Site{}
+	for rows.Next() {
+		site := &models.Site{}
+		err := rows.Scan(
+			&site.ID, &site.UUID, &site.Domain, &site.URL, &site.MaxDepth,
+			&site.PageCount, &site.Status, &site.IPAddress, &site.UserAgent,
+			&site.Browser, &site.BrowserVersion, &site.OS, &site.DeviceType,
+			&site.SessionID, &site.Cookies, &site.Referrer, &site.CreatedAt,
+			&site.CompletedAt, &site.LastCrawled,
+		)
+		if err != nil {
+			return nil, err
+		}
+		sites = append(sites, site)
+	}
+
+	return sites, nil
+}
+
+func (db *DB) UpdateSiteStatus(uuid string, status string, pageCount int) error {
+	query := `
+		UPDATE sites 
+		SET status = ?, page_count = ?, completed_at = ?, last_crawled = ?
+		WHERE uuid = ?
+	`
+
+	now := time.Now()
+	_, err := db.conn.Exec(query, status, pageCount, now, now, uuid)
+	return err
+}
+
+func (db *DB) DeleteSite(id int) error {
+	// Pages will be deleted automatically due to CASCADE
+	_, err := db.conn.Exec("DELETE FROM sites WHERE id = ?", id)
+	return err
+}
+
+func (db *DB) AddPage(page *models.Page) error {
+	query := `
+		INSERT OR IGNORE INTO pages (site_id, url, depth, last_modified, priority, change_freq)
+		VALUES (?, ?, ?, ?, ?, ?)
+	`
+
+	_, err := db.conn.Exec(query,
+		page.SiteID, page.URL, page.Depth, page.LastModified,
+		page.Priority, page.ChangeFreq,
+	)
+	return err
+}
+
+func (db *DB) GetPagesBySiteID(siteID int) ([]*models.Page, error) {
+	query := `
+		SELECT id, site_id, url, depth, last_modified, priority, change_freq
+		FROM pages WHERE site_id = ? ORDER BY depth, url
+	`
+
+	rows, err := db.conn.Query(query, siteID)
+	if err != nil {
+		return nil, err
+	}
+	defer rows.Close()
+
+	pages := []*models.Page{}
+	for rows.Next() {
+		page := &models.Page{}
+		err := rows.Scan(
+			&page.ID, &page.SiteID, &page.URL, &page.Depth,
+			&page.LastModified, &page.Priority, &page.ChangeFreq,
+		)
+		if err != nil {
+			return nil, err
+		}
+		pages = append(pages, page)
+	}
+
+	return pages, nil
+}
diff --git a/go.mod b/go.mod
new file mode 100644
index 0000000..3b50574
--- /dev/null
+++ b/go.mod
@@ -0,0 +1,11 @@
+module sitemap-api
+
+go 1.21
+
+require (
+	github.com/go-chi/chi/v5 v5.0.11
+	github.com/go-chi/cors v1.2.1
+	github.com/google/uuid v1.5.0
+	github.com/mattn/go-sqlite3 v1.14.19
+	golang.org/x/net v0.20.0
+)
diff --git a/handler.go b/handler.go
new file mode 100644
index 0000000..ceebb1f
--- /dev/null
+++ b/handler.go
@@ -0,0 +1,465 @@
+package handlers
+
+import (
+	"encoding/json"
+	"encoding/xml"
+	"fmt"
+	"net/http"
+	"net/url"
+	"sitemap-api/crawler"
+	"sitemap-api/database"
+	"sitemap-api/models"
+	"strconv"
+	"strings"
+	"sync"
+	"time"
+
+	"github.com/go-chi/chi/v5"
+	"github.com/google/uuid"
+)
+
+type Handler struct {
+	db            *database.DB
+	crawler       *crawler.Crawler
+	streamManager *StreamManager
+}
+
+func NewHandler(db *database.DB, streamManager *StreamManager) *Handler {
+	return &Handler{
+		db:            db,
+		crawler:       crawler.NewCrawler(db),
+		streamManager: streamManager,
+	}
+}
+
+// StreamManager handles multiple concurrent SSE connections
+type StreamManager struct {
+	mu      sync.RWMutex
+	streams map[string]chan models.Event
+}
+
+func NewStreamManager() *StreamManager {
+	return &StreamManager{
+		streams: make(map[string]chan models.Event),
+	}
+}
+
+func (sm *StreamManager) CreateStream(uuid string) chan models.Event {
+	sm.mu.Lock()
+	defer sm.mu.Unlock()
+
+	ch := make(chan models.Event, 100)
+	sm.streams[uuid] = ch
+	return ch
+}
+
+func (sm *StreamManager) GetStream(uuid string) (chan models.Event, bool) {
+	sm.mu.RLock()
+	defer sm.mu.RUnlock()
+
+	ch, exists := sm.streams[uuid]
+	return ch, exists
+}
+
+func (sm *StreamManager) CloseStream(uuid string) {
+	sm.mu.Lock()
+	defer sm.mu.Unlock()
+
+	if ch, exists := sm.streams[uuid]; exists {
+		close(ch)
+		delete(sm.streams, uuid)
+	}
+}
+
+// GenerateSitemapXML handles POST /generate-sitemap-xml
+func (h *Handler) GenerateSitemapXML(w http.ResponseWriter, r *http.Request) {
+	var req struct {
+		URL      string `json:"url"`
+		MaxDepth int    `json:"max_depth"`
+	}
+
+	if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
+		http.Error(w, "Invalid request body", http.StatusBadRequest)
+		return
+	}
+
+	// Validate URL
+	if req.URL == "" {
+		http.Error(w, "URL is required", http.StatusBadRequest)
+		return
+	}
+
+	parsedURL, err := url.Parse(req.URL)
+	if err != nil || parsedURL.Scheme == "" || parsedURL.Host == "" {
+		http.Error(w, "Invalid URL format", http.StatusBadRequest)
+		return
+	}
+
+	// Set default max depth
+	if req.MaxDepth <= 0 || req.MaxDepth > 5 {
+		req.MaxDepth = 3
+	}
+
+	// Generate UUID server-side
+	generatedUUID := uuid.New().String()
+
+	// Extract client metadata
+	ip := getClientIP(r)
+	userAgent := r.Header.Get("User-Agent")
+	browser, browserVersion := parseBrowser(userAgent)
+	os := parseOS(userAgent)
+	deviceType := parseDeviceType(userAgent)
+	sessionID := getOrCreateSession(r)
+	cookies := extractCookies(r)
+	referrer := r.Header.Get("Referer")
+
+	// Extract domain from URL
+	domain := parsedURL.Host
+
+	// Create site record
+	site := &models.Site{
+		UUID:           generatedUUID,
+		Domain:         domain,
+		URL:            req.URL,
+		MaxDepth:       req.MaxDepth,
+		Status:         "processing",
+		IPAddress:      ip,
+		UserAgent:      userAgent,
+		Browser:        browser,
+		BrowserVersion: browserVersion,
+		OS:             os,
+		DeviceType:     deviceType,
+		SessionID:      sessionID,
+		Cookies:        cookies,
+		Referrer:       referrer,
+	}
+
+	siteID, err := h.db.CreateSite(site)
+	if err != nil {
+		http.Error(w, fmt.Sprintf("Failed to create site: %v", err), http.StatusInternalServerError)
+		return
+	}
+
+	// Create SSE stream for this UUID
+	eventChan := h.streamManager.CreateStream(generatedUUID)
+
+	// Start crawling in background (non-blocking)
+	go func() {
+		h.crawler.Crawl(generatedUUID, req.URL, req.MaxDepth, eventChan)
+		// Close stream after crawl completes
+		time.Sleep(2 * time.Second) // Give time for final events to be sent
+		h.streamManager.CloseStream(generatedUUID)
+	}()
+
+	// Return immediately with UUID
+	response := map[string]interface{}{
+		"uuid":       generatedUUID,
+		"site_id":    siteID,
+		"status":     "processing",
+		"stream_url": "/stream/" + generatedUUID,
+		"message":    "Sitemap generation started",
+	}
+
+	w.Header().Set("Content-Type", "application/json")
+	json.NewEncoder(w).Encode(response)
+}
+
+// StreamSSE handles GET /stream/{uuid}
+func (h *Handler) StreamSSE(w http.ResponseWriter, r *http.Request) {
+	uuid := chi.URLParam(r, "uuid")
+
+	// Get event channel for this UUID
+	eventChan, exists := h.streamManager.GetStream(uuid)
+	if !exists {
+		http.Error(w, "Stream not found", http.StatusNotFound)
+		return
+	}
+
+	// Set SSE headers
+	w.Header().Set("Content-Type", "text/event-stream")
+	w.Header().Set("Cache-Control", "no-cache")
+	w.Header().Set("Connection", "keep-alive")
+	w.Header().Set("Access-Control-Allow-Origin", "*")
+
+	flusher, ok := w.(http.Flusher)
+	if !ok {
+		http.Error(w, "Streaming unsupported", http.StatusInternalServerError)
+		return
+	}
+
+	// Send connected event
+	connectedData := map[string]string{
+		"uuid":    uuid,
+		"message": "Connected to stream",
+	}
+	connectedJSON, _ := json.Marshal(connectedData)
+	fmt.Fprintf(w, "event: connected\ndata: %s\n\n", connectedJSON)
+	flusher.Flush()
+
+	// Stream events
+	for event := range eventChan {
+		data, err := json.Marshal(event.Data)
+		if err != nil {
+			continue
+		}
+		fmt.Fprintf(w, "event: %s\ndata: %s\n\n", event.Type, data)
+		flusher.Flush()
+	}
+}
+
+// DownloadSitemap handles GET /download/{uuid}
+func (h *Handler) DownloadSitemap(w http.ResponseWriter, r *http.Request) {
+	uuidParam := chi.URLParam(r, "uuid")
+
+	// Get site by UUID
+	site, err := h.db.GetSiteByUUID(uuidParam)
+	if err != nil {
+		http.Error(w, "Sitemap not found", http.StatusNotFound)
+		return
+	}
+
+	// Get all pages for this site
+	pages, err := h.db.GetPagesBySiteID(site.ID)
+	if err != nil {
+		http.Error(w, "Failed to retrieve pages", http.StatusInternalServerError)
+		return
+	}
+
+	// Generate XML sitemap
+	sitemap := generateXMLSitemap(pages)
+
+	// Set headers
+	filename := fmt.Sprintf("sitemap-%s.xml", strings.ReplaceAll(site.Domain, ".", "-"))
+	w.Header().Set("Content-Type", "application/xml; charset=utf-8")
+	w.Header().Set("Content-Disposition", fmt.Sprintf("attachment; filename=\"%s\"", filename))
+	w.Header().Set("X-Generated-At", time.Now().Format(time.RFC3339))
+
+	// Write XML
+	w.Write([]byte(xml.Header))
+	w.Write([]byte(sitemap))
+}
+
+// GetSites handles GET /sites
+func (h *Handler) GetSites(w http.ResponseWriter, r *http.Request) {
+	sites, err := h.db.GetAllSites()
+	if err != nil {
+		http.Error(w, "Failed to retrieve sites", http.StatusInternalServerError)
+		return
+	}
+
+	w.Header().Set("Content-Type", "application/json")
+	json.NewEncoder(w).Encode(sites)
+}
+
+// GetSite handles GET /sites/{id}
+func (h *Handler) GetSite(w http.ResponseWriter, r *http.Request) {
+	idParam := chi.URLParam(r, "id")
+	id, err := strconv.Atoi(idParam)
+	if err != nil {
+		http.Error(w, "Invalid site ID", http.StatusBadRequest)
+		return
+	}
+
+	site, err := h.db.GetSiteByID(id)
+	if err != nil {
+		http.Error(w, "Site not found", http.StatusNotFound)
+		return
+	}
+
+	w.Header().Set("Content-Type", "application/json")
+	json.NewEncoder(w).Encode(site)
+}
+
+// DeleteSite handles DELETE /sites/{id}
+func (h *Handler) DeleteSite(w http.ResponseWriter, r *http.Request) {
+	idParam := chi.URLParam(r, "id")
+	id, err := strconv.Atoi(idParam)
+	if err != nil {
+		http.Error(w, "Invalid site ID", http.StatusBadRequest)
+		return
+	}
+
+	if err := h.db.DeleteSite(id); err != nil {
+		http.Error(w, "Failed to delete site", http.StatusInternalServerError)
+		return
+	}
+
+	w.Header().Set("Content-Type", "application/json")
+	json.NewEncoder(w).Encode(map[string]interface{}{
+		"success": true,
+		"message": "Site deleted successfully",
+	})
+}
+
+// Health handles GET /health
+func (h *Handler) Health(w http.ResponseWriter, r *http.Request) {
+	w.Header().Set("Content-Type", "application/json")
+	json.NewEncoder(w).Encode(map[string]string{
+		"status": "healthy",
+		"time":   time.Now().Format(time.RFC3339),
+	})
+}
+
+// Helper functions
+
+func getClientIP(r *http.Request) string {
+	// Check X-Forwarded-For header first
+	forwarded := r.Header.Get("X-Forwarded-For")
+	if forwarded != "" {
+		// Get first IP if multiple
+		ips := strings.Split(forwarded, ",")
+		return strings.TrimSpace(ips[0])
+	}
+
+	// Check X-Real-IP header
+	realIP := r.Header.Get("X-Real-IP")
+	if realIP != "" {
+		return realIP
+	}
+
+	// Fallback to RemoteAddr
+	ip := r.RemoteAddr
+	if strings.Contains(ip, ":") {
+		ip = strings.Split(ip, ":")[0]
+	}
+	return ip
+}
+
+func parseBrowser(userAgent string) (string, string) {
+	ua := strings.ToLower(userAgent)
+
+	browsers := map[string]string{
+		"edg":     "Edge",
+		"chrome":  "Chrome",
+		"firefox": "Firefox",
+		"safari":  "Safari",
+		"opera":   "Opera",
+	}
+
+	for key, name := range browsers {
+		if strings.Contains(ua, key) {
+			// Extract version
+			version := extractVersion(ua, key)
+			return name, version
+		}
+	}
+
+	return "Unknown", ""
+}
+
+func extractVersion(ua, browser string) string {
+	idx := strings.Index(ua, browser)
+	if idx == -1 {
+		return ""
+	}
+
+	versionStart := idx + len(browser)
+	if versionStart >= len(ua) {
+		return ""
+	}
+
+	// Skip forward to version number
+	for versionStart < len(ua) && (ua[versionStart] == '/' || ua[versionStart] == ' ') {
+		versionStart++
+	}
+
+	versionEnd := versionStart
+	for versionEnd < len(ua) && (ua[versionEnd] >= '0' && ua[versionEnd] <= '9' || ua[versionEnd] == '.') {
+		versionEnd++
+	}
+
+	return ua[versionStart:versionEnd]
+}
+
+func parseOS(userAgent string) string {
+	ua := strings.ToLower(userAgent)
+
+	oses := []struct {
+		keyword string
+		name    string
+	}{
+		{"windows nt 10", "Windows 10"},
+		{"windows nt 11", "Windows 11"},
+		{"mac os x", "macOS"},
+		{"android", "Android"},
+		{"iphone", "iOS"},
+		{"ipad", "iOS"},
+		{"linux", "Linux"},
+	}
+
+	for _, os := range oses {
+		if strings.Contains(ua, os.keyword) {
+			return os.name
+		}
+	}
+
+	return "Unknown"
+}
+
+func parseDeviceType(userAgent string) string {
+	ua := strings.ToLower(userAgent)
+
+	if strings.Contains(ua, "mobile") || strings.Contains(ua, "android") || strings.Contains(ua, "iphone") {
+		return "Mobile"
+	}
+
+	if strings.Contains(ua, "tablet") || strings.Contains(ua, "ipad") {
+		return "Tablet"
+	}
+
+	return "Desktop"
+}
+
+func getOrCreateSession(r *http.Request) string {
+	// Try to get existing session from cookie
+	cookie, err := r.Cookie("session_id")
+	if err == nil && cookie.Value != "" {
+		return cookie.Value
+	}
+
+	// Generate new session ID
+	return uuid.New().String()
+}
+
+func extractCookies(r *http.Request) string {
+	cookies := r.Cookies()
+	if len(cookies) == 0 {
+		return ""
+	}
+
+	cookieData := make(map[string]string)
+	for _, cookie := range cookies {
+		cookieData[cookie.Name] = cookie.Value
+	}
+
+	data, _ := json.Marshal(cookieData)
+	return string(data)
+}
+
+func generateXMLSitemap(pages []*models.Page) string {
+	var sb strings.Builder
+
+	sb.WriteString("<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">\n")
+
+	for _, page := range pages {
+		sb.WriteString("  <url>\n")
+		sb.WriteString(fmt.Sprintf("    <loc>%s</loc>\n", xmlEscape(page.URL)))
+		sb.WriteString(fmt.Sprintf("    <lastmod>%s</lastmod>\n", page.LastModified.Format("2006-01-02")))
+		sb.WriteString(fmt.Sprintf("    <changefreq>%s</changefreq>\n", page.ChangeFreq))
+		sb.WriteString(fmt.Sprintf("    <priority>%.1f</priority>\n", page.Priority))
+		sb.WriteString("  </url>\n")
+	}
+
+	sb.WriteString("</urlset>")
+
+	return sb.String()
+}
+
+func xmlEscape(s string) string {
+	s = strings.ReplaceAll(s, "&", "&amp;")
+	s = strings.ReplaceAll(s, "<", "&lt;")
+	s = strings.ReplaceAll(s, ">", "&gt;")
+	s = strings.ReplaceAll(s, "\"", "&quot;")
+	s = strings.ReplaceAll(s, "'", "&apos;")
+	return s
+}
diff --git a/index.html b/index.html
new file mode 100644
index 0000000..c8d4276
--- /dev/null
+++ b/index.html
@@ -0,0 +1,726 @@
+<!DOCTYPE html>
+<html lang="en">
+<head>
+    <meta charset="UTF-8">
+    <meta name="viewport" content="width=device-width, initial-scale=1.0">
+    <title>Sitemap Generator</title>
+    <style>
+        * {
+            margin: 0;
+            padding: 0;
+            box-sizing: border-box;
+        }
+
+        body {
+            font-family: -apple-system, BlinkMacSystemFont, 'Segoe UI', Roboto, sans-serif;
+            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+            min-height: 100vh;
+            padding: 20px;
+        }
+
+        .container {
+            max-width: 1200px;
+            margin: 0 auto;
+            background: white;
+            border-radius: 12px;
+            box-shadow: 0 20px 40px rgba(0,0,0,0.1);
+            overflow: hidden;
+        }
+
+        .header {
+            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+            color: white;
+            padding: 30px;
+            text-align: center;
+        }
+
+        .header h1 {
+            font-size: 2.5rem;
+            margin-bottom: 10px;
+        }
+
+        .header p {
+            opacity: 0.9;
+            font-size: 1.1rem;
+        }
+
+        .main {
+            padding: 40px;
+        }
+
+        .form-section {
+            background: #f8f9fa;
+            padding: 30px;
+            border-radius: 8px;
+            margin-bottom: 30px;
+        }
+
+        .form-group {
+            margin-bottom: 20px;
+        }
+
+        label {
+            display: block;
+            margin-bottom: 8px;
+            font-weight: 600;
+            color: #333;
+        }
+
+        input[type="text"], input[type="number"] {
+            width: 100%;
+            padding: 12px;
+            border: 2px solid #e9ecef;
+            border-radius: 6px;
+            font-size: 1rem;
+            transition: border-color 0.3s;
+        }
+
+        input[type="text"]:focus, input[type="number"]:focus {
+            outline: none;
+            border-color: #667eea;
+        }
+
+        .btn {
+            background: linear-gradient(135deg, #667eea 0%, #764ba2 100%);
+            color: white;
+            border: none;
+            padding: 12px 30px;
+            border-radius: 6px;
+            font-size: 1rem;
+            font-weight: 600;
+            cursor: pointer;
+            transition: transform 0.2s, box-shadow 0.2s;
+        }
+
+        .btn:hover {
+            transform: translateY(-2px);
+            box-shadow: 0 10px 20px rgba(102, 126, 234, 0.3);
+        }
+
+        .btn:disabled {
+            background: #6c757d;
+            cursor: not-allowed;
+            transform: none;
+            box-shadow: none;
+        }
+
+        .btn-secondary {
+            background: #28a745;
+            margin-left: 10px;
+        }
+
+        .btn-danger {
+            background: #dc3545;
+            margin-left: 10px;
+        }
+
+        .progress-section {
+            display: none;
+            background: #f8f9fa;
+            padding: 30px;
+            border-radius: 8px;
+            margin-bottom: 30px;
+        }
+
+        .progress-bar {
+            width: 100%;
+            height: 20px;
+            background: #e9ecef;
+            border-radius: 10px;
+            overflow: hidden;
+            margin-bottom: 15px;
+        }
+
+        .progress-fill {
+            height: 100%;
+            background: linear-gradient(90deg, #667eea, #764ba2);
+            width: 0%;
+            transition: width 0.3s ease;
+            border-radius: 10px;
+        }
+
+        .status {
+            padding: 15px;
+            border-radius: 6px;
+            margin-bottom: 20px;
+            font-weight: 500;
+        }
+
+        .status.info {
+            background: #d1ecf1;
+            color: rgb(12, 84, 96);
+            border: 1px solid #bee5eb;
+        }
+
+        .status.success {
+            background: #d4edda;
+            color: rgb(21, 87, 36);
+            border: 1px solid #c3e6cb;
+        }
+
+        .status.error {
+            background: #f8d7da;
+            color: rgb(114, 28, 36);
+            border: 1px solid #f5c6cb;
+        }
+
+        .log-section {
+            background: #2d3748;
+            color: #e2e8f0;
+            padding: 20px;
+            border-radius: 8px;
+            font-family: 'Courier New', monospace;
+            font-size: 0.9rem;
+            max-height: 400px;
+            overflow-y: auto;
+            margin-bottom: 30px;
+        }
+
+        .log-entry {
+            margin-bottom: 10px;
+            padding: 8px;
+            border-radius: 4px;
+        }
+
+        .log-entry.start {
+            background: rgba(102, 126, 234, 0.2);
+            border-left: 3px solid #667eea;
+        }
+
+        .log-entry.progress {
+            background: rgba(40, 167, 69, 0.1);
+            border-left: 3px solid #28a745;
+        }
+
+        .log-entry.complete {
+            background: rgba(25, 135, 84, 0.1);
+            border-left: 3px solid #198754;
+        }
+
+        .log-entry.error {
+            background: rgba(220, 53, 69, 0.1);
+            border-left: 3px solid #dc3545;
+        }
+
+        .results-section {
+            display: none;
+            background: #f8f9fa;
+            padding: 30px;
+            border-radius: 8px;
+        }
+
+        .stats-grid {
+            display: grid;
+            grid-template-columns: repeat(auto-fit, minmax(200px, 1fr));
+            gap: 20px;
+            margin-bottom: 30px;
+        }
+
+        .stat-card {
+            background: white;
+            padding: 20px;
+            border-radius: 8px;
+            box-shadow: 0 4px 6px rgba(0,0,0,0.1);
+            text-align: center;
+        }
+
+        .stat-number {
+            font-size: 2rem;
+            font-weight: bold;
+            color: #667eea;
+        }
+
+        .stat-label {
+            color: #6c757d;
+            margin-top: 5px;
+        }
+
+        .sites-list {
+            background: white;
+            border-radius: 8px;
+            overflow: hidden;
+        }
+
+        .site-item {
+            padding: 20px;
+            border-bottom: 1px solid #e9ecef;
+            display: flex;
+            justify-content: space-between;
+            align-items: center;
+        }
+
+        .site-item:last-child {
+            border-bottom: none;
+        }
+
+        .site-domain {
+            font-weight: 600;
+            color: #333;
+            margin-bottom: 5px;
+        }
+
+        .site-meta {
+            color: #6c757d;
+            font-size: 0.9rem;
+        }
+
+        .connection-status {
+            display: inline-block;
+            padding: 5px 10px;
+            border-radius: 4px;
+            font-size: 0.9rem;
+            margin-bottom: 15px;
+        }
+
+        .connection-status.connected {
+            background: #d4edda;
+            color: #155724;
+        }
+
+        .connection-status.disconnected {
+            background: #f8d7da;
+            color: #721c24;
+        }
+    </style>
+</head>
+<body>
+    <div class="container">
+        <div class="header">
+            <h1>🗺️ XML Sitemap Generator</h1>
+            <p>Generate sitemaps for your websites with real-time progress tracking</p>
+        </div>
+
+        <div class="main">
+            <!-- Input Form -->
+            <div class="form-section">
+                <div class="form-group">
+                    <label for="siteUrl">Website URL</label>
+                    <input type="text" id="siteUrl" placeholder="https://example.com" value="https://example.com">
+                </div>
+
+                <div class="form-group">
+                    <label for="maxDepth">Max Crawl Depth (1-5)</label>
+                    <input type="number" id="maxDepth" min="1" max="5" value="3">
+                </div>
+
+                <button class="btn" id="generateBtn" onclick="sitemapGen.generateSitemap()">
+                    🚀 Generate Sitemap
+                </button>
+            </div>
+
+            <!-- Progress Section -->
+            <div class="progress-section" id="progressSection">
+                <div id="connectionStatus" class="connection-status disconnected">🔴 Disconnected</div>
+                
+                <div class="progress-bar">
+                    <div class="progress-fill" id="progressFill"></div>
+                </div>
+
+                <div class="status info" id="statusMessage">
+                    Initializing...
+                </div>
+
+                <div class="stats-grid">
+                    <div class="stat-card">
+                        <div class="stat-number" id="totalPages">0</div>
+                        <div class="stat-label">Pages Found</div>
+                    </div>
+                    <div class="stat-card">
+                        <div class="stat-number" id="currentDepth">0</div>
+                        <div class="stat-label">Current Depth</div>
+                    </div>
+                    <div class="stat-card">
+                        <div class="stat-number" id="crawlTime">0s</div>
+                        <div class="stat-label">Crawl Time</div>
+                    </div>
+                </div>
+
+                <div id="currentUrl" style="margin-bottom: 20px; font-size: 0.9rem; color: #6c757d;">
+                    Current: -
+                </div>
+
+                <button class="btn btn-secondary" id="downloadBtn" onclick="sitemapGen.downloadSitemap()" style="display: none;">
+                    📥 Download Sitemap
+                </button>
+                <button class="btn btn-danger" onclick="sitemapGen.clearAll()">
+                    🗑️ Clear All
+                </button>
+            </div>
+
+            <!-- Log Section -->
+            <div class="log-section" id="logSection" style="display: none;">
+                <div id="logContainer"></div>
+            </div>
+
+            <!-- Results Section -->
+            <div class="results-section" id="resultsSection">
+                <h3 style="margin-bottom: 20px;">Previously Generated Sitemaps</h3>
+                <div class="sites-list" id="sitesList"></div>
+            </div>
+        </div>
+    </div>
+
+    <script>
+        class SitemapGenerator {
+            constructor() {
+                this.currentUUID = null;
+                this.currentSiteId = null;
+                this.eventSource = null;
+                this.startTime = null;
+                this.crawlTimer = null;
+                this.totalPages = 0;
+
+                // Load existing sites on init
+                this.loadExistingSites();
+            }
+
+            async generateSitemap() {
+                let url = document.getElementById('siteUrl').value.trim();
+                const maxDepth = parseInt(document.getElementById('maxDepth').value);
+
+                if (!url) {
+                    alert('Please enter a website URL');
+                    return;
+                }
+
+                // Auto-add protocol if missing
+                if (!url.startsWith('http://') && !url.startsWith('https://')) {
+                    url = 'https://' + url;
+                    document.getElementById('siteUrl').value = url;
+                }
+
+                try {
+                    this.disableForm();
+                    this.showProgress();
+                    this.addLog(`Starting crawl of ${url}`, 'start');
+
+                    const response = await fetch('/generate-sitemap-xml', {
+                        method: 'POST',
+                        headers: {
+                            'Content-Type': 'application/json',
+                        },
+                        credentials: 'include',
+                        body: JSON.stringify({
+                            url: url,
+                            max_depth: maxDepth
+                        })
+                    });
+
+                    if (!response.ok) {
+                        throw new Error(`HTTP ${response.status}: ${response.statusText}`);
+                    }
+
+                    const result = await response.json();
+                    this.currentUUID = result.uuid;
+                    this.currentSiteId = result.site_id;
+                    
+                    this.addLog(`UUID assigned: ${result.uuid}`, 'start');
+                    this.addLog(`${result.message}`, 'start');
+
+                    // Connect to stream with UUID
+                    this.connectToStream(result.uuid);
+
+                } catch (error) {
+                    this.addLog(`Failed to start crawl: ${error.message}`, 'error');
+                    this.showError(error.message);
+                    this.stopCrawlTimer();
+                    this.enableForm();
+                }
+            }
+
+            connectToStream(uuid) {
+                if (this.eventSource) {
+                    this.eventSource.close();
+                }
+
+                this.addLog(`Connecting to stream: /stream/${uuid}`, 'start');
+
+                try {
+                    this.eventSource = new EventSource(`/stream/${uuid}`);
+
+                    this.eventSource.addEventListener('connected', (e) => {
+                        const data = JSON.parse(e.data);
+                        this.addLog(`Connected to stream`, 'progress');
+                        this.updateConnectionStatus(true);
+                        document.getElementById('statusMessage').textContent = '🔄 Crawling...';
+                    });
+
+                    this.eventSource.addEventListener('started', (e) => {
+                        const data = JSON.parse(e.data);
+                        this.startTime = Date.now();
+                        this.startCrawlTimer();
+                        this.addLog(`Crawl started: ${data.url}`, 'start');
+                    });
+
+                    this.eventSource.addEventListener('progress', (e) => {
+                        const data = JSON.parse(e.data);
+                        this.updateProgress(data);
+                        if (data.current_url) {
+                            this.addLog(`Depth ${data.depth}: ${data.current_url}`, 'progress');
+                        }
+                    });
+
+                    this.eventSource.addEventListener('complete', (e) => {
+                        const data = JSON.parse(e.data);
+                        this.completeGeneration(data);
+                        this.addLog(`✅ Complete! Found ${data.pages_found} pages`, 'complete');
+                        this.showDownloadButton();
+                        this.eventSource.close();
+                        this.stopCrawlTimer();
+                        this.loadExistingSites();
+                    });
+
+                    this.eventSource.addEventListener('error', (e) => {
+                        if (e.data) {
+                            try {
+                                const data = JSON.parse(e.data);
+                                this.showError(data.error);
+                                this.addLog(`❌ Error: ${data.error}`, 'error');
+                            } catch (err) {
+                                this.addLog('Stream error occurred', 'error');
+                            }
+                        }
+                        this.eventSource.close();
+                        this.updateConnectionStatus(false);
+                        this.enableForm();
+                        this.stopCrawlTimer();
+                    });
+
+                    this.eventSource.onerror = (e) => {
+                        this.updateConnectionStatus(false);
+                        this.addLog('Stream connection lost', 'error');
+                    };
+
+                } catch (error) {
+                    this.addLog(`Failed to connect to stream: ${error.message}`, 'error');
+                }
+            }
+
+            
+            startCrawlTimer() {
+                this.stopCrawlTimer();
+                
+                this.crawlTimer = setInterval(() => {
+                    if (this.startTime) {
+                        const elapsed = Math.floor((Date.now() - this.startTime) / 1000);
+                        const element = document.getElementById('crawlTime');
+                        if (element) {
+                            element.textContent = `${elapsed}s`;
+                        }
+                    }
+                }, 1000);
+            }
+
+            stopCrawlTimer() {
+                if (this.crawlTimer) {
+                    clearInterval(this.crawlTimer);
+                    this.crawlTimer = null;
+                }
+            }
+
+            updateProgress(data) {
+                if (data.pages_found !== undefined) {
+                    this.totalPages = data.pages_found;
+                    document.getElementById('totalPages').textContent = data.pages_found;
+                }
+
+                if (data.depth !== undefined) {
+                    document.getElementById('currentDepth').textContent = data.depth;
+                }
+
+                if (data.current_url) {
+                    document.getElementById('currentUrl').textContent = `Current: ${data.current_url}`;
+                }
+
+                // Update progress bar (estimated)
+                const progress = Math.min((this.totalPages / 100) * 100, 90);
+                document.getElementById('progressFill').style.width = `${progress}%`;
+            }
+
+            completeGeneration(data) {
+                this.enableForm();
+                document.getElementById('progressFill').style.width = '100%';
+                document.getElementById('statusMessage').className = 'status success';
+                document.getElementById('statusMessage').textContent = `✅ Complete! Found ${data.pages_found || this.totalPages} pages`;
+                
+                if (this.startTime) {
+                    const elapsed = Math.floor((Date.now() - this.startTime) / 1000);
+                    document.getElementById('crawlTime').textContent = `${elapsed}s`;
+                }
+            }
+
+            showDownloadButton() {
+                document.getElementById('downloadBtn').style.display = 'inline-block';
+            }
+
+            async downloadSitemap() {
+                if (!this.currentUUID) {
+                    // Try to get the latest site
+                    await this.loadExistingSites();
+                }
+
+                if (this.currentUUID) {
+                    window.open(`/download/${this.currentUUID}`, '_blank');
+                } else {
+                    alert('No sitemap available for download');
+                }
+            }
+
+            async loadExistingSites() {
+                try {
+                    const response = await fetch('/sites');
+                    if (!response.ok) return;
+
+                    const sites = await response.json();
+                    this.displaySites(sites);
+
+                    if (sites.length > 0) {
+                        this.currentUUID = sites[0].uuid;
+                        this.currentSiteId = sites[0].id;
+                    }
+                } catch (error) {
+                    console.error('Failed to load sites:', error);
+                }
+            }
+
+            displaySites(sites) {
+                const container = document.getElementById('sitesList');
+                container.innerHTML = '';
+
+                if (sites.length === 0) {
+                    container.innerHTML = '<p>No sitemaps generated yet.</p>';
+                    document.getElementById('resultsSection').style.display = 'none';
+                    return;
+                }
+
+                sites.forEach(site => {
+                    const siteDiv = document.createElement('div');
+                    siteDiv.className = 'site-item';
+                    const createdDate = new Date(site.created_at).toLocaleString();
+                    siteDiv.innerHTML = `
+                        <div class="site-info">
+                            <div class="site-domain">${site.domain}</div>
+                            <div class="site-meta">
+                                ${site.page_count} pages • 
+                                Status: ${site.status} • 
+                                Created: ${createdDate}
+                            </div>
+                        </div>
+                        <div>
+                            <button class="btn btn-secondary" onclick="sitemapGen.downloadSiteSitemap('${site.uuid}')">
+                                📥 Download
+                            </button>
+                            <button class="btn btn-danger" onclick="sitemapGen.deleteSite(${site.id})">
+                                🗑️ Delete
+                            </button>
+                        </div>
+                    `;
+                    container.appendChild(siteDiv);
+                });
+
+                document.getElementById('resultsSection').style.display = 'block';
+            }
+
+            async downloadSiteSitemap(uuid) {
+                window.open(`/download/${uuid}`, '_blank');
+            }
+
+            async deleteSite(siteId) {
+                if (!confirm('Are you sure you want to delete this sitemap?')) return;
+
+                try {
+                    const response = await fetch(`/sites/${siteId}`, {
+                        method: 'DELETE'
+                    });
+
+                    if (response.ok) {
+                        this.addLog(`Site ${siteId} deleted`, 'complete');
+                        this.loadExistingSites();
+                    } else {
+                        throw new Error('Failed to delete site');
+                    }
+                } catch (error) {
+                    this.addLog(`Delete failed: ${error.message}`, 'error');
+                }
+            }
+
+            async clearAll() {
+                if (!confirm('Are you sure you want to clear all data? This cannot be undone.')) return;
+
+                try {
+                    this.stopCrawlTimer();
+                    
+                    const sitesResponse = await fetch('/sites');
+                    if (sitesResponse.ok) {
+                        const sites = await sitesResponse.json();
+                        for (const site of sites) {
+                            await fetch(`/sites/${site.id}`, {
+                                method: 'DELETE'
+                            });
+                        }
+                    }
+
+                    this.addLog('All data cleared', 'complete');
+                    this.loadExistingSites();
+                    this.hideProgress();
+                } catch (error) {
+                    this.addLog(`Clear failed: ${error.message}`, 'error');
+                }
+            }
+
+            showProgress() {
+                document.getElementById('progressSection').style.display = 'block';
+                document.getElementById('logSection').style.display = 'block';
+            }
+
+            hideProgress() {
+                document.getElementById('progressSection').style.display = 'none';
+            }
+
+            showError(message) {
+                document.getElementById('statusMessage').className = 'status error';
+                document.getElementById('statusMessage').textContent = `❌ Error: ${message}`;
+            }
+
+            addLog(message, type = 'info') {
+                const container = document.getElementById('logContainer');
+                const entry = document.createElement('div');
+                entry.className = `log-entry ${type}`;
+                const timestamp = new Date().toLocaleTimeString();
+                entry.innerHTML = `<strong>[${timestamp}]</strong> ${message}`;
+                
+                container.prepend(entry);
+                container.scrollTop = 0;
+            }
+
+            updateConnectionStatus(connected) {
+                const status = document.getElementById('connectionStatus');
+                if (connected) {
+                    status.className = 'connection-status connected';
+                    status.textContent = '🟢 Connected';
+                } else {
+                    status.className = 'connection-status disconnected';
+                    status.textContent = '🔴 Disconnected';
+                }
+            }
+
+            disableForm() {
+                document.getElementById('generateBtn').disabled = true;
+                document.getElementById('generateBtn').textContent = '🔄 Generating...';
+                document.getElementById('siteUrl').disabled = true;
+                document.getElementById('maxDepth').disabled = true;
+            }
+
+            enableForm() {
+                document.getElementById('generateBtn').disabled = false;
+                document.getElementById('generateBtn').textContent = '🚀 Generate Sitemap';
+                document.getElementById('siteUrl').disabled = false;
+                document.getElementById('maxDepth').disabled = false;
+            }
+        }
+
+        // Initialize the application
+        const sitemapGen = new SitemapGenerator();
+    </script>
+</body>
+</html>
diff --git a/main.go b/main.go
new file mode 100644
index 0000000..d5d4217
--- /dev/null
+++ b/main.go
@@ -0,0 +1,72 @@
+package main
+
+import (
+	"log"
+	"net/http"
+	"os"
+
+	"sitemap-api/database"
+	"sitemap-api/handlers"
+
+	"github.com/go-chi/chi/v5"
+	"github.com/go-chi/chi/v5/middleware"
+	"github.com/go-chi/cors"
+)
+
+func main() {
+	// Initialize database
+	db, err := database.NewDB("sitemap.db")
+	if err != nil {
+		log.Fatal("Failed to initialize database:", err)
+	}
+	defer db.Close()
+
+	// Initialize stream manager
+	streamManager := handlers.NewStreamManager()
+
+	// Initialize handler
+	h := handlers.NewHandler(db, streamManager)
+
+	// Setup router
+	r := chi.NewRouter()
+
+	// Middleware
+	r.Use(middleware.Logger)
+	r.Use(middleware.Recoverer)
+	r.Use(middleware.RealIP)
+	r.Use(cors.Handler(cors.Options{
+		AllowedOrigins:   []string{"https://*", "http://*"},
+		AllowedMethods:   []string{"GET", "POST", "PUT", "DELETE", "OPTIONS"},
+		AllowedHeaders:   []string{"Accept", "Authorization", "Content-Type"},
+		ExposedHeaders:   []string{"Link"},
+		AllowCredentials: true,
+		MaxAge:           300,
+	}))
+
+	// Serve static HTML at root
+	r.Get("/", func(w http.ResponseWriter, r *http.Request) {
+		http.ServeFile(w, r, "static/index.html")
+	})
+
+	// API Routes
+	r.Post("/generate-sitemap-xml", h.GenerateSitemapXML)
+	r.Get("/stream/{uuid}", h.StreamSSE)
+	r.Get("/download/{uuid}", h.DownloadSitemap)
+	r.Get("/sites", h.GetSites)
+	r.Get("/sites/{id}", h.GetSite)
+	r.Delete("/sites/{id}", h.DeleteSite)
+	r.Get("/health", h.Health)
+
+	// Get port from environment or use default
+	port := os.Getenv("PORT")
+	if port == "" {
+		port = "8080"
+	}
+
+	log.Printf("Server starting on port %s...", port)
+	log.Printf("Visit http://localhost:%s to use the sitemap generator", port)
+	
+	if err := http.ListenAndServe(":"+port, r); err != nil {
+		log.Fatal("Server failed to start:", err)
+	}
+}
diff --git a/run.sh b/run.sh
new file mode 100644
index 0000000..5de6a3d
--- /dev/null
+++ b/run.sh
@@ -0,0 +1,44 @@
+#!/bin/bash
+
+echo "🗺️  XML Sitemap Generator API"
+echo "=============================="
+echo ""
+
+# Check if Go is installed
+if ! command -v go &> /dev/null; then
+    echo "❌ Error: Go is not installed"
+    echo "Please install Go 1.21+ from https://golang.org/dl/"
+    exit 1
+fi
+
+echo "✅ Go version: $(go version)"
+echo ""
+
+# Install dependencies
+echo "📦 Installing dependencies..."
+go mod download
+if [ $? -ne 0 ]; then
+    echo "❌ Failed to download dependencies"
+    exit 1
+fi
+echo "✅ Dependencies installed"
+echo ""
+
+# Build the application
+echo "🔨 Building application..."
+go build -o sitemap-api .
+if [ $? -ne 0 ]; then
+    echo "❌ Build failed"
+    exit 1
+fi
+echo "✅ Build successful"
+echo ""
+
+# Run the application
+echo "🚀 Starting server..."
+echo ""
+echo "Server will start on http://localhost:8080"
+echo "Press Ctrl+C to stop"
+echo ""
+
+./sitemap-api
diff --git a/site.go b/site.go
new file mode 100644
index 0000000..fcc7d15
--- /dev/null
+++ b/site.go
@@ -0,0 +1,59 @@
+package models
+
+import "time"
+
+type Site struct {
+	ID             int       `json:"id"`
+	UUID           string    `json:"uuid"`
+	Domain         string    `json:"domain"`
+	URL            string    `json:"url"`
+	MaxDepth       int       `json:"max_depth"`
+	PageCount      int       `json:"page_count"`
+	Status         string    `json:"status"` // processing, completed, failed
+	IPAddress      string    `json:"ip_address"`
+	UserAgent      string    `json:"user_agent"`
+	Browser        string    `json:"browser"`
+	BrowserVersion string    `json:"browser_version"`
+	OS             string    `json:"os"`
+	DeviceType     string    `json:"device_type"`
+	SessionID      string    `json:"session_id"`
+	Cookies        string    `json:"cookies"`
+	Referrer       string    `json:"referrer"`
+	CreatedAt      time.Time `json:"created_at"`
+	CompletedAt    *time.Time `json:"completed_at,omitempty"`
+	LastCrawled    *time.Time `json:"last_crawled,omitempty"`
+}
+
+type Page struct {
+	ID           int       `json:"id"`
+	SiteID       int       `json:"site_id"`
+	URL          string    `json:"url"`
+	Depth        int       `json:"depth"`
+	LastModified time.Time `json:"last_modified"`
+	Priority     float64   `json:"priority"`
+	ChangeFreq   string    `json:"change_freq"`
+}
+
+type Event struct {
+	Type string      `json:"type"`
+	Data interface{} `json:"data"`
+}
+
+type ProgressData struct {
+	UUID       string `json:"uuid"`
+	PagesFound int    `json:"pages_found"`
+	Depth      int    `json:"depth"`
+	CurrentURL string `json:"current_url"`
+}
+
+type CompleteData struct {
+	UUID        string `json:"uuid"`
+	PagesFound  int    `json:"pages_found"`
+	SiteID      int    `json:"site_id"`
+	DownloadURL string `json:"download_url"`
+}
+
+type ErrorData struct {
+	UUID  string `json:"uuid"`
+	Error string `json:"error"`
+}