Files
sitemap-generator-xml-golang/handler.go
2026-02-05 19:13:45 +05:30

466 lines
11 KiB
Go

package handlers
import (
"encoding/json"
"encoding/xml"
"fmt"
"net/http"
"net/url"
"sitemap-api/crawler"
"sitemap-api/database"
"sitemap-api/models"
"strconv"
"strings"
"sync"
"time"
"github.com/go-chi/chi/v5"
"github.com/google/uuid"
)
type Handler struct {
db *database.DB
crawler *crawler.Crawler
streamManager *StreamManager
}
func NewHandler(db *database.DB, streamManager *StreamManager) *Handler {
return &Handler{
db: db,
crawler: crawler.NewCrawler(db),
streamManager: streamManager,
}
}
// StreamManager handles multiple concurrent SSE connections
type StreamManager struct {
mu sync.RWMutex
streams map[string]chan models.Event
}
func NewStreamManager() *StreamManager {
return &StreamManager{
streams: make(map[string]chan models.Event),
}
}
func (sm *StreamManager) CreateStream(uuid string) chan models.Event {
sm.mu.Lock()
defer sm.mu.Unlock()
ch := make(chan models.Event, 100)
sm.streams[uuid] = ch
return ch
}
func (sm *StreamManager) GetStream(uuid string) (chan models.Event, bool) {
sm.mu.RLock()
defer sm.mu.RUnlock()
ch, exists := sm.streams[uuid]
return ch, exists
}
func (sm *StreamManager) CloseStream(uuid string) {
sm.mu.Lock()
defer sm.mu.Unlock()
if ch, exists := sm.streams[uuid]; exists {
close(ch)
delete(sm.streams, uuid)
}
}
// GenerateSitemapXML handles POST /generate-sitemap-xml
func (h *Handler) GenerateSitemapXML(w http.ResponseWriter, r *http.Request) {
var req struct {
URL string `json:"url"`
MaxDepth int `json:"max_depth"`
}
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
http.Error(w, "Invalid request body", http.StatusBadRequest)
return
}
// Validate URL
if req.URL == "" {
http.Error(w, "URL is required", http.StatusBadRequest)
return
}
parsedURL, err := url.Parse(req.URL)
if err != nil || parsedURL.Scheme == "" || parsedURL.Host == "" {
http.Error(w, "Invalid URL format", http.StatusBadRequest)
return
}
// Set default max depth
if req.MaxDepth <= 0 || req.MaxDepth > 5 {
req.MaxDepth = 3
}
// Generate UUID server-side
generatedUUID := uuid.New().String()
// Extract client metadata
ip := getClientIP(r)
userAgent := r.Header.Get("User-Agent")
browser, browserVersion := parseBrowser(userAgent)
os := parseOS(userAgent)
deviceType := parseDeviceType(userAgent)
sessionID := getOrCreateSession(r)
cookies := extractCookies(r)
referrer := r.Header.Get("Referer")
// Extract domain from URL
domain := parsedURL.Host
// Create site record
site := &models.Site{
UUID: generatedUUID,
Domain: domain,
URL: req.URL,
MaxDepth: req.MaxDepth,
Status: "processing",
IPAddress: ip,
UserAgent: userAgent,
Browser: browser,
BrowserVersion: browserVersion,
OS: os,
DeviceType: deviceType,
SessionID: sessionID,
Cookies: cookies,
Referrer: referrer,
}
siteID, err := h.db.CreateSite(site)
if err != nil {
http.Error(w, fmt.Sprintf("Failed to create site: %v", err), http.StatusInternalServerError)
return
}
// Create SSE stream for this UUID
eventChan := h.streamManager.CreateStream(generatedUUID)
// Start crawling in background (non-blocking)
go func() {
h.crawler.Crawl(generatedUUID, req.URL, req.MaxDepth, eventChan)
// Close stream after crawl completes
time.Sleep(2 * time.Second) // Give time for final events to be sent
h.streamManager.CloseStream(generatedUUID)
}()
// Return immediately with UUID
response := map[string]interface{}{
"uuid": generatedUUID,
"site_id": siteID,
"status": "processing",
"stream_url": "/stream/" + generatedUUID,
"message": "Sitemap generation started",
}
w.Header().Set("Content-Type", "application/json")
json.NewEncoder(w).Encode(response)
}
// StreamSSE handles GET /stream/{uuid}
func (h *Handler) StreamSSE(w http.ResponseWriter, r *http.Request) {
uuid := chi.URLParam(r, "uuid")
// Get event channel for this UUID
eventChan, exists := h.streamManager.GetStream(uuid)
if !exists {
http.Error(w, "Stream not found", http.StatusNotFound)
return
}
// Set SSE headers
w.Header().Set("Content-Type", "text/event-stream")
w.Header().Set("Cache-Control", "no-cache")
w.Header().Set("Connection", "keep-alive")
w.Header().Set("Access-Control-Allow-Origin", "*")
flusher, ok := w.(http.Flusher)
if !ok {
http.Error(w, "Streaming unsupported", http.StatusInternalServerError)
return
}
// Send connected event
connectedData := map[string]string{
"uuid": uuid,
"message": "Connected to stream",
}
connectedJSON, _ := json.Marshal(connectedData)
fmt.Fprintf(w, "event: connected\ndata: %s\n\n", connectedJSON)
flusher.Flush()
// Stream events
for event := range eventChan {
data, err := json.Marshal(event.Data)
if err != nil {
continue
}
fmt.Fprintf(w, "event: %s\ndata: %s\n\n", event.Type, data)
flusher.Flush()
}
}
// DownloadSitemap handles GET /download/{uuid}
func (h *Handler) DownloadSitemap(w http.ResponseWriter, r *http.Request) {
uuidParam := chi.URLParam(r, "uuid")
// Get site by UUID
site, err := h.db.GetSiteByUUID(uuidParam)
if err != nil {
http.Error(w, "Sitemap not found", http.StatusNotFound)
return
}
// Get all pages for this site
pages, err := h.db.GetPagesBySiteID(site.ID)
if err != nil {
http.Error(w, "Failed to retrieve pages", http.StatusInternalServerError)
return
}
// Generate XML sitemap
sitemap := generateXMLSitemap(pages)
// Set headers
filename := fmt.Sprintf("sitemap-%s.xml", strings.ReplaceAll(site.Domain, ".", "-"))
w.Header().Set("Content-Type", "application/xml; charset=utf-8")
w.Header().Set("Content-Disposition", fmt.Sprintf("attachment; filename=\"%s\"", filename))
w.Header().Set("X-Generated-At", time.Now().Format(time.RFC3339))
// Write XML
w.Write([]byte(xml.Header))
w.Write([]byte(sitemap))
}
// GetSites handles GET /sites
func (h *Handler) GetSites(w http.ResponseWriter, r *http.Request) {
sites, err := h.db.GetAllSites()
if err != nil {
http.Error(w, "Failed to retrieve sites", http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "application/json")
json.NewEncoder(w).Encode(sites)
}
// GetSite handles GET /sites/{id}
func (h *Handler) GetSite(w http.ResponseWriter, r *http.Request) {
idParam := chi.URLParam(r, "id")
id, err := strconv.Atoi(idParam)
if err != nil {
http.Error(w, "Invalid site ID", http.StatusBadRequest)
return
}
site, err := h.db.GetSiteByID(id)
if err != nil {
http.Error(w, "Site not found", http.StatusNotFound)
return
}
w.Header().Set("Content-Type", "application/json")
json.NewEncoder(w).Encode(site)
}
// DeleteSite handles DELETE /sites/{id}
func (h *Handler) DeleteSite(w http.ResponseWriter, r *http.Request) {
idParam := chi.URLParam(r, "id")
id, err := strconv.Atoi(idParam)
if err != nil {
http.Error(w, "Invalid site ID", http.StatusBadRequest)
return
}
if err := h.db.DeleteSite(id); err != nil {
http.Error(w, "Failed to delete site", http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "application/json")
json.NewEncoder(w).Encode(map[string]interface{}{
"success": true,
"message": "Site deleted successfully",
})
}
// Health handles GET /health
func (h *Handler) Health(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/json")
json.NewEncoder(w).Encode(map[string]string{
"status": "healthy",
"time": time.Now().Format(time.RFC3339),
})
}
// Helper functions
func getClientIP(r *http.Request) string {
// Check X-Forwarded-For header first
forwarded := r.Header.Get("X-Forwarded-For")
if forwarded != "" {
// Get first IP if multiple
ips := strings.Split(forwarded, ",")
return strings.TrimSpace(ips[0])
}
// Check X-Real-IP header
realIP := r.Header.Get("X-Real-IP")
if realIP != "" {
return realIP
}
// Fallback to RemoteAddr
ip := r.RemoteAddr
if strings.Contains(ip, ":") {
ip = strings.Split(ip, ":")[0]
}
return ip
}
func parseBrowser(userAgent string) (string, string) {
ua := strings.ToLower(userAgent)
browsers := map[string]string{
"edg": "Edge",
"chrome": "Chrome",
"firefox": "Firefox",
"safari": "Safari",
"opera": "Opera",
}
for key, name := range browsers {
if strings.Contains(ua, key) {
// Extract version
version := extractVersion(ua, key)
return name, version
}
}
return "Unknown", ""
}
func extractVersion(ua, browser string) string {
idx := strings.Index(ua, browser)
if idx == -1 {
return ""
}
versionStart := idx + len(browser)
if versionStart >= len(ua) {
return ""
}
// Skip forward to version number
for versionStart < len(ua) && (ua[versionStart] == '/' || ua[versionStart] == ' ') {
versionStart++
}
versionEnd := versionStart
for versionEnd < len(ua) && (ua[versionEnd] >= '0' && ua[versionEnd] <= '9' || ua[versionEnd] == '.') {
versionEnd++
}
return ua[versionStart:versionEnd]
}
func parseOS(userAgent string) string {
ua := strings.ToLower(userAgent)
oses := []struct {
keyword string
name string
}{
{"windows nt 10", "Windows 10"},
{"windows nt 11", "Windows 11"},
{"mac os x", "macOS"},
{"android", "Android"},
{"iphone", "iOS"},
{"ipad", "iOS"},
{"linux", "Linux"},
}
for _, os := range oses {
if strings.Contains(ua, os.keyword) {
return os.name
}
}
return "Unknown"
}
func parseDeviceType(userAgent string) string {
ua := strings.ToLower(userAgent)
if strings.Contains(ua, "mobile") || strings.Contains(ua, "android") || strings.Contains(ua, "iphone") {
return "Mobile"
}
if strings.Contains(ua, "tablet") || strings.Contains(ua, "ipad") {
return "Tablet"
}
return "Desktop"
}
func getOrCreateSession(r *http.Request) string {
// Try to get existing session from cookie
cookie, err := r.Cookie("session_id")
if err == nil && cookie.Value != "" {
return cookie.Value
}
// Generate new session ID
return uuid.New().String()
}
func extractCookies(r *http.Request) string {
cookies := r.Cookies()
if len(cookies) == 0 {
return ""
}
cookieData := make(map[string]string)
for _, cookie := range cookies {
cookieData[cookie.Name] = cookie.Value
}
data, _ := json.Marshal(cookieData)
return string(data)
}
func generateXMLSitemap(pages []*models.Page) string {
var sb strings.Builder
sb.WriteString("<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">\n")
for _, page := range pages {
sb.WriteString(" <url>\n")
sb.WriteString(fmt.Sprintf(" <loc>%s</loc>\n", xmlEscape(page.URL)))
sb.WriteString(fmt.Sprintf(" <lastmod>%s</lastmod>\n", page.LastModified.Format("2006-01-02")))
sb.WriteString(fmt.Sprintf(" <changefreq>%s</changefreq>\n", page.ChangeFreq))
sb.WriteString(fmt.Sprintf(" <priority>%.1f</priority>\n", page.Priority))
sb.WriteString(" </url>\n")
}
sb.WriteString("</urlset>")
return sb.String()
}
func xmlEscape(s string) string {
s = strings.ReplaceAll(s, "&", "&amp;")
s = strings.ReplaceAll(s, "<", "&lt;")
s = strings.ReplaceAll(s, ">", "&gt;")
s = strings.ReplaceAll(s, "\"", "&quot;")
s = strings.ReplaceAll(s, "'", "&apos;")
return s
}