package handlers import ( "encoding/json" "encoding/xml" "fmt" "net/http" "net/url" "sitemap-api/crawler" "sitemap-api/database" "sitemap-api/models" "strconv" "strings" "sync" "time" "github.com/go-chi/chi/v5" "github.com/google/uuid" ) type Handler struct { db *database.DB crawler *crawler.Crawler streamManager *StreamManager } func NewHandler(db *database.DB, streamManager *StreamManager) *Handler { return &Handler{ db: db, crawler: crawler.NewCrawler(db), streamManager: streamManager, } } // StreamManager handles multiple concurrent SSE connections type StreamManager struct { mu sync.RWMutex streams map[string]chan models.Event } func NewStreamManager() *StreamManager { return &StreamManager{ streams: make(map[string]chan models.Event), } } func (sm *StreamManager) CreateStream(uuid string) chan models.Event { sm.mu.Lock() defer sm.mu.Unlock() ch := make(chan models.Event, 100) sm.streams[uuid] = ch return ch } func (sm *StreamManager) GetStream(uuid string) (chan models.Event, bool) { sm.mu.RLock() defer sm.mu.RUnlock() ch, exists := sm.streams[uuid] return ch, exists } func (sm *StreamManager) CloseStream(uuid string) { sm.mu.Lock() defer sm.mu.Unlock() if ch, exists := sm.streams[uuid]; exists { close(ch) delete(sm.streams, uuid) } } // GenerateSitemapXML handles POST /generate-sitemap-xml func (h *Handler) GenerateSitemapXML(w http.ResponseWriter, r *http.Request) { var req struct { URL string `json:"url"` MaxDepth int `json:"max_depth"` } if err := json.NewDecoder(r.Body).Decode(&req); err != nil { http.Error(w, "Invalid request body", http.StatusBadRequest) return } // Validate URL if req.URL == "" { http.Error(w, "URL is required", http.StatusBadRequest) return } parsedURL, err := url.Parse(req.URL) if err != nil || parsedURL.Scheme == "" || parsedURL.Host == "" { http.Error(w, "Invalid URL format", http.StatusBadRequest) return } // Set default max depth if req.MaxDepth <= 0 || req.MaxDepth > 5 { req.MaxDepth = 5 } // Generate UUID server-side generatedUUID := uuid.New().String() // Extract client metadata ip := getClientIP(r) userAgent := r.Header.Get("User-Agent") browser, browserVersion := parseBrowser(userAgent) os := parseOS(userAgent) deviceType := parseDeviceType(userAgent) sessionID := getOrCreateSession(r) cookies := extractCookies(r) referrer := r.Header.Get("Referer") // Extract domain from URL domain := parsedURL.Host // Create site record site := &models.Site{ UUID: generatedUUID, Domain: domain, URL: req.URL, MaxDepth: req.MaxDepth, Status: "processing", IPAddress: ip, UserAgent: userAgent, Browser: browser, BrowserVersion: browserVersion, OS: os, DeviceType: deviceType, SessionID: sessionID, Cookies: cookies, Referrer: referrer, } siteID, err := h.db.CreateSite(site) if err != nil { http.Error(w, fmt.Sprintf("Failed to create site: %v", err), http.StatusInternalServerError) return } // Create SSE stream for this UUID eventChan := h.streamManager.CreateStream(generatedUUID) // Start crawling in background (non-blocking) go func() { h.crawler.Crawl(generatedUUID, req.URL, req.MaxDepth, eventChan) // Close stream after crawl completes time.Sleep(2 * time.Second) // Give time for final events to be sent h.streamManager.CloseStream(generatedUUID) }() // Add UUID to user UUIDs cookie userUUIDs := getUserUUIDsFromCookie(r) userUUIDs = append(userUUIDs, generatedUUID) // Keep only last 20 UUIDs and remove duplicates uniqueUUIDs := removeDuplicateUUIDs(userUUIDs) if len(uniqueUUIDs) > 20 { uniqueUUIDs = uniqueUUIDs[len(uniqueUUIDs)-20:] } // Set cookie with user UUIDs http.SetCookie(w, &http.Cookie{ Name: "user_uuids", Value: strings.Join(uniqueUUIDs, ","), Path: "/", MaxAge: 86400 * 90, // 90 days HttpOnly: false, SameSite: http.SameSiteLaxMode, }) // Return immediately with UUID response := map[string]interface{}{ "uuid": generatedUUID, "site_id": siteID, "status": "processing", "stream_url": "/stream/" + generatedUUID, "message": "Sitemap generation started", } w.Header().Set("Content-Type", "application/json") json.NewEncoder(w).Encode(response) } // StreamSSE handles GET /stream/{uuid} func (h *Handler) StreamSSE(w http.ResponseWriter, r *http.Request) { uuid := chi.URLParam(r, "uuid") // Get event channel for this UUID eventChan, exists := h.streamManager.GetStream(uuid) if !exists { http.Error(w, "Stream not found", http.StatusNotFound) return } // Set SSE headers w.Header().Set("Content-Type", "text/event-stream") w.Header().Set("Cache-Control", "no-cache") w.Header().Set("Connection", "keep-alive") w.Header().Set("Access-Control-Allow-Origin", "*") flusher, ok := w.(http.Flusher) if !ok { http.Error(w, "Streaming unsupported", http.StatusInternalServerError) return } // Send connected event connectedData := map[string]string{ "uuid": uuid, "message": "Connected to stream", } connectedJSON, _ := json.Marshal(connectedData) fmt.Fprintf(w, "event: connected\ndata: %s\n\n", connectedJSON) flusher.Flush() // Stream events for event := range eventChan { data, err := json.Marshal(event.Data) if err != nil { continue } fmt.Fprintf(w, "event: %s\ndata: %s\n\n", event.Type, data) flusher.Flush() } } // DownloadSitemap handles GET /download/{uuid} func (h *Handler) DownloadSitemap(w http.ResponseWriter, r *http.Request) { uuidParam := chi.URLParam(r, "uuid") // Get site by UUID site, err := h.db.GetSiteByUUID(uuidParam) if err != nil { http.Error(w, "Sitemap not found", http.StatusNotFound) return } // Get all pages for this site pages, err := h.db.GetPagesBySiteID(site.ID) if err != nil { http.Error(w, "Failed to retrieve pages", http.StatusInternalServerError) return } // Generate XML sitemap sitemap := generateXMLSitemap(pages) // Set headers filename := fmt.Sprintf("sitemap-%s.xml", strings.ReplaceAll(site.Domain, ".", "-")) w.Header().Set("Content-Type", "application/xml; charset=utf-8") w.Header().Set("Content-Disposition", fmt.Sprintf("attachment; filename=\"%s\"", filename)) w.Header().Set("X-Generated-At", time.Now().Format(time.RFC3339)) // Write XML w.Write([]byte(xml.Header)) w.Write([]byte(sitemap)) } // GetSites handles GET /sites func (h *Handler) GetSites(w http.ResponseWriter, r *http.Request) { // Get user's UUIDs from cookie userUUIDs := getUserUUIDsFromCookie(r) var sites []*models.Site var err error if len(userUUIDs) > 0 { // Get only user's sites sites, err = h.db.GetSitesByUUIDs(userUUIDs) } else { // No UUIDs found, return empty list sites = []*models.Site{} } if err != nil { http.Error(w, "Failed to retrieve sites", http.StatusInternalServerError) return } w.Header().Set("Content-Type", "application/json") json.NewEncoder(w).Encode(sites) } // GetSite handles GET /sites/{id} func (h *Handler) GetSite(w http.ResponseWriter, r *http.Request) { idParam := chi.URLParam(r, "id") id, err := strconv.Atoi(idParam) if err != nil { http.Error(w, "Invalid site ID", http.StatusBadRequest) return } site, err := h.db.GetSiteByID(id) if err != nil { http.Error(w, "Site not found", http.StatusNotFound) return } w.Header().Set("Content-Type", "application/json") json.NewEncoder(w).Encode(site) } // DeleteSite handles DELETE /sites/{id} func (h *Handler) DeleteSite(w http.ResponseWriter, r *http.Request) { idParam := chi.URLParam(r, "id") id, err := strconv.Atoi(idParam) if err != nil { http.Error(w, "Invalid site ID", http.StatusBadRequest) return } if err := h.db.DeleteSite(id); err != nil { http.Error(w, "Failed to delete site", http.StatusInternalServerError) return } w.Header().Set("Content-Type", "application/json") json.NewEncoder(w).Encode(map[string]interface{}{ "success": true, "message": "Site deleted successfully", }) } // Health handles GET /health func (h *Handler) Health(w http.ResponseWriter, r *http.Request) { w.Header().Set("Content-Type", "application/json") json.NewEncoder(w).Encode(map[string]string{ "status": "healthy", "time": time.Now().Format(time.RFC3339), }) } // Helper functions func getClientIP(r *http.Request) string { // Check X-Forwarded-For header first forwarded := r.Header.Get("X-Forwarded-For") if forwarded != "" { // Get first IP if multiple ips := strings.Split(forwarded, ",") return strings.TrimSpace(ips[0]) } // Check X-Real-IP header realIP := r.Header.Get("X-Real-IP") if realIP != "" { return realIP } // Fallback to RemoteAddr ip := r.RemoteAddr if strings.Contains(ip, ":") { ip = strings.Split(ip, ":")[0] } return ip } func parseBrowser(userAgent string) (string, string) { ua := strings.ToLower(userAgent) browsers := map[string]string{ "edg": "Edge", "chrome": "Chrome", "firefox": "Firefox", "safari": "Safari", "opera": "Opera", } for key, name := range browsers { if strings.Contains(ua, key) { // Extract version version := extractVersion(ua, key) return name, version } } return "Unknown", "" } func extractVersion(ua, browser string) string { idx := strings.Index(ua, browser) if idx == -1 { return "" } versionStart := idx + len(browser) if versionStart >= len(ua) { return "" } // Skip forward to version number for versionStart < len(ua) && (ua[versionStart] == '/' || ua[versionStart] == ' ') { versionStart++ } versionEnd := versionStart for versionEnd < len(ua) && (ua[versionEnd] >= '0' && ua[versionEnd] <= '9' || ua[versionEnd] == '.') { versionEnd++ } return ua[versionStart:versionEnd] } func parseOS(userAgent string) string { ua := strings.ToLower(userAgent) oses := []struct { keyword string name string }{ {"windows nt 10", "Windows 10"}, {"windows nt 11", "Windows 11"}, {"mac os x", "macOS"}, {"android", "Android"}, {"iphone", "iOS"}, {"ipad", "iOS"}, {"linux", "Linux"}, } for _, os := range oses { if strings.Contains(ua, os.keyword) { return os.name } } return "Unknown" } func parseDeviceType(userAgent string) string { ua := strings.ToLower(userAgent) if strings.Contains(ua, "mobile") || strings.Contains(ua, "android") || strings.Contains(ua, "iphone") { return "Mobile" } if strings.Contains(ua, "tablet") || strings.Contains(ua, "ipad") { return "Tablet" } return "Desktop" } func getOrCreateSession(r *http.Request) string { // Try to get existing session from cookie cookie, err := r.Cookie("session_id") if err == nil && cookie.Value != "" { return cookie.Value } // Generate new session ID return uuid.New().String() } func getUserUUIDsFromCookie(r *http.Request) []string { // Get user UUIDs from cookie cookie, err := r.Cookie("user_uuids") if err != nil || cookie.Value == "" { return []string{} } // Parse comma-separated UUIDs uuids := strings.Split(cookie.Value, ",") // Filter out empty strings var result []string for _, uuid := range uuids { if strings.TrimSpace(uuid) != "" { result = append(result, strings.TrimSpace(uuid)) } } return result } func removeDuplicateUUIDs(uuids []string) []string { seen := make(map[string]bool) var result []string for _, uuid := range uuids { if !seen[uuid] { seen[uuid] = true result = append(result, uuid) } } return result } func extractCookies(r *http.Request) string { cookies := r.Cookies() if len(cookies) == 0 { return "" } cookieData := make(map[string]string) for _, cookie := range cookies { cookieData[cookie.Name] = cookie.Value } data, _ := json.Marshal(cookieData) return string(data) } func generateXMLSitemap(pages []*models.Page) string { var sb strings.Builder sb.WriteString("\n") for _, page := range pages { sb.WriteString(" \n") sb.WriteString(fmt.Sprintf(" %s\n", xmlEscape(page.URL))) sb.WriteString(fmt.Sprintf(" %s\n", page.LastModified.Format("2006-01-02"))) sb.WriteString(fmt.Sprintf(" %s\n", page.ChangeFreq)) sb.WriteString(fmt.Sprintf(" %.2f\n", page.Priority)) sb.WriteString(" \n") } sb.WriteString("") return sb.String() } func xmlEscape(s string) string { s = strings.ReplaceAll(s, "&", "&") s = strings.ReplaceAll(s, "<", "<") s = strings.ReplaceAll(s, ">", ">") s = strings.ReplaceAll(s, "\"", """) s = strings.ReplaceAll(s, "'", "'") return s }