532 lines
12 KiB
Go
532 lines
12 KiB
Go
package handlers
|
|
|
|
import (
|
|
"encoding/json"
|
|
"encoding/xml"
|
|
"fmt"
|
|
"net/http"
|
|
"net/url"
|
|
"sitemap-api/crawler"
|
|
"sitemap-api/database"
|
|
"sitemap-api/models"
|
|
"strconv"
|
|
"strings"
|
|
"sync"
|
|
"time"
|
|
|
|
"github.com/go-chi/chi/v5"
|
|
"github.com/google/uuid"
|
|
)
|
|
|
|
type Handler struct {
|
|
db *database.DB
|
|
crawler *crawler.Crawler
|
|
streamManager *StreamManager
|
|
}
|
|
|
|
func NewHandler(db *database.DB, streamManager *StreamManager) *Handler {
|
|
return &Handler{
|
|
db: db,
|
|
crawler: crawler.NewCrawler(db),
|
|
streamManager: streamManager,
|
|
}
|
|
}
|
|
|
|
// StreamManager handles multiple concurrent SSE connections
|
|
type StreamManager struct {
|
|
mu sync.RWMutex
|
|
streams map[string]chan models.Event
|
|
}
|
|
|
|
func NewStreamManager() *StreamManager {
|
|
return &StreamManager{
|
|
streams: make(map[string]chan models.Event),
|
|
}
|
|
}
|
|
|
|
func (sm *StreamManager) CreateStream(uuid string) chan models.Event {
|
|
sm.mu.Lock()
|
|
defer sm.mu.Unlock()
|
|
|
|
ch := make(chan models.Event, 100)
|
|
sm.streams[uuid] = ch
|
|
return ch
|
|
}
|
|
|
|
func (sm *StreamManager) GetStream(uuid string) (chan models.Event, bool) {
|
|
sm.mu.RLock()
|
|
defer sm.mu.RUnlock()
|
|
|
|
ch, exists := sm.streams[uuid]
|
|
return ch, exists
|
|
}
|
|
|
|
func (sm *StreamManager) CloseStream(uuid string) {
|
|
sm.mu.Lock()
|
|
defer sm.mu.Unlock()
|
|
|
|
if ch, exists := sm.streams[uuid]; exists {
|
|
close(ch)
|
|
delete(sm.streams, uuid)
|
|
}
|
|
}
|
|
|
|
// GenerateSitemapXML handles POST /generate-sitemap-xml
|
|
func (h *Handler) GenerateSitemapXML(w http.ResponseWriter, r *http.Request) {
|
|
var req struct {
|
|
URL string `json:"url"`
|
|
MaxDepth int `json:"max_depth"`
|
|
}
|
|
|
|
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
|
http.Error(w, "Invalid request body", http.StatusBadRequest)
|
|
return
|
|
}
|
|
|
|
// Validate URL
|
|
if req.URL == "" {
|
|
http.Error(w, "URL is required", http.StatusBadRequest)
|
|
return
|
|
}
|
|
|
|
parsedURL, err := url.Parse(req.URL)
|
|
if err != nil || parsedURL.Scheme == "" || parsedURL.Host == "" {
|
|
http.Error(w, "Invalid URL format", http.StatusBadRequest)
|
|
return
|
|
}
|
|
|
|
// Set default max depth
|
|
if req.MaxDepth <= 0 || req.MaxDepth > 5 {
|
|
req.MaxDepth = 5
|
|
}
|
|
|
|
// Generate UUID server-side
|
|
generatedUUID := uuid.New().String()
|
|
|
|
// Extract client metadata
|
|
ip := getClientIP(r)
|
|
userAgent := r.Header.Get("User-Agent")
|
|
browser, browserVersion := parseBrowser(userAgent)
|
|
os := parseOS(userAgent)
|
|
deviceType := parseDeviceType(userAgent)
|
|
sessionID := getOrCreateSession(r)
|
|
cookies := extractCookies(r)
|
|
referrer := r.Header.Get("Referer")
|
|
|
|
// Extract domain from URL
|
|
domain := parsedURL.Host
|
|
|
|
// Create site record
|
|
site := &models.Site{
|
|
UUID: generatedUUID,
|
|
Domain: domain,
|
|
URL: req.URL,
|
|
MaxDepth: req.MaxDepth,
|
|
Status: "processing",
|
|
IPAddress: ip,
|
|
UserAgent: userAgent,
|
|
Browser: browser,
|
|
BrowserVersion: browserVersion,
|
|
OS: os,
|
|
DeviceType: deviceType,
|
|
SessionID: sessionID,
|
|
Cookies: cookies,
|
|
Referrer: referrer,
|
|
}
|
|
|
|
siteID, err := h.db.CreateSite(site)
|
|
if err != nil {
|
|
http.Error(w, fmt.Sprintf("Failed to create site: %v", err), http.StatusInternalServerError)
|
|
return
|
|
}
|
|
|
|
// Create SSE stream for this UUID
|
|
eventChan := h.streamManager.CreateStream(generatedUUID)
|
|
|
|
// Start crawling in background (non-blocking)
|
|
go func() {
|
|
h.crawler.Crawl(generatedUUID, req.URL, req.MaxDepth, eventChan)
|
|
// Close stream after crawl completes
|
|
time.Sleep(2 * time.Second) // Give time for final events to be sent
|
|
h.streamManager.CloseStream(generatedUUID)
|
|
}()
|
|
|
|
// Add UUID to user UUIDs cookie
|
|
userUUIDs := getUserUUIDsFromCookie(r)
|
|
userUUIDs = append(userUUIDs, generatedUUID)
|
|
|
|
// Keep only last 20 UUIDs and remove duplicates
|
|
uniqueUUIDs := removeDuplicateUUIDs(userUUIDs)
|
|
if len(uniqueUUIDs) > 20 {
|
|
uniqueUUIDs = uniqueUUIDs[len(uniqueUUIDs)-20:]
|
|
}
|
|
|
|
// Set cookie with user UUIDs
|
|
http.SetCookie(w, &http.Cookie{
|
|
Name: "user_uuids",
|
|
Value: strings.Join(uniqueUUIDs, ","),
|
|
Path: "/",
|
|
MaxAge: 86400 * 90, // 90 days
|
|
HttpOnly: false,
|
|
SameSite: http.SameSiteLaxMode,
|
|
})
|
|
|
|
// Return immediately with UUID
|
|
response := map[string]interface{}{
|
|
"uuid": generatedUUID,
|
|
"site_id": siteID,
|
|
"status": "processing",
|
|
"stream_url": "/stream/" + generatedUUID,
|
|
"message": "Sitemap generation started",
|
|
}
|
|
|
|
w.Header().Set("Content-Type", "application/json")
|
|
json.NewEncoder(w).Encode(response)
|
|
}
|
|
|
|
// StreamSSE handles GET /stream/{uuid}
|
|
func (h *Handler) StreamSSE(w http.ResponseWriter, r *http.Request) {
|
|
uuid := chi.URLParam(r, "uuid")
|
|
|
|
// Get event channel for this UUID
|
|
eventChan, exists := h.streamManager.GetStream(uuid)
|
|
if !exists {
|
|
http.Error(w, "Stream not found", http.StatusNotFound)
|
|
return
|
|
}
|
|
|
|
// Set SSE headers
|
|
w.Header().Set("Content-Type", "text/event-stream")
|
|
w.Header().Set("Cache-Control", "no-cache")
|
|
w.Header().Set("Connection", "keep-alive")
|
|
w.Header().Set("Access-Control-Allow-Origin", "*")
|
|
|
|
flusher, ok := w.(http.Flusher)
|
|
if !ok {
|
|
http.Error(w, "Streaming unsupported", http.StatusInternalServerError)
|
|
return
|
|
}
|
|
|
|
// Send connected event
|
|
connectedData := map[string]string{
|
|
"uuid": uuid,
|
|
"message": "Connected to stream",
|
|
}
|
|
connectedJSON, _ := json.Marshal(connectedData)
|
|
fmt.Fprintf(w, "event: connected\ndata: %s\n\n", connectedJSON)
|
|
flusher.Flush()
|
|
|
|
// Stream events
|
|
for event := range eventChan {
|
|
data, err := json.Marshal(event.Data)
|
|
if err != nil {
|
|
continue
|
|
}
|
|
fmt.Fprintf(w, "event: %s\ndata: %s\n\n", event.Type, data)
|
|
flusher.Flush()
|
|
}
|
|
}
|
|
|
|
// DownloadSitemap handles GET /download/{uuid}
|
|
func (h *Handler) DownloadSitemap(w http.ResponseWriter, r *http.Request) {
|
|
uuidParam := chi.URLParam(r, "uuid")
|
|
|
|
// Get site by UUID
|
|
site, err := h.db.GetSiteByUUID(uuidParam)
|
|
if err != nil {
|
|
http.Error(w, "Sitemap not found", http.StatusNotFound)
|
|
return
|
|
}
|
|
|
|
// Get all pages for this site
|
|
pages, err := h.db.GetPagesBySiteID(site.ID)
|
|
if err != nil {
|
|
http.Error(w, "Failed to retrieve pages", http.StatusInternalServerError)
|
|
return
|
|
}
|
|
|
|
// Generate XML sitemap
|
|
sitemap := generateXMLSitemap(pages)
|
|
|
|
// Set headers
|
|
filename := fmt.Sprintf("sitemap-%s.xml", strings.ReplaceAll(site.Domain, ".", "-"))
|
|
w.Header().Set("Content-Type", "application/xml; charset=utf-8")
|
|
w.Header().Set("Content-Disposition", fmt.Sprintf("attachment; filename=\"%s\"", filename))
|
|
w.Header().Set("X-Generated-At", time.Now().Format(time.RFC3339))
|
|
|
|
// Write XML
|
|
w.Write([]byte(xml.Header))
|
|
w.Write([]byte(sitemap))
|
|
}
|
|
|
|
// GetSites handles GET /sites
|
|
func (h *Handler) GetSites(w http.ResponseWriter, r *http.Request) {
|
|
// Get user's UUIDs from cookie
|
|
userUUIDs := getUserUUIDsFromCookie(r)
|
|
|
|
var sites []*models.Site
|
|
var err error
|
|
|
|
if len(userUUIDs) > 0 {
|
|
// Get only user's sites
|
|
sites, err = h.db.GetSitesByUUIDs(userUUIDs)
|
|
} else {
|
|
// No UUIDs found, return empty list
|
|
sites = []*models.Site{}
|
|
}
|
|
|
|
if err != nil {
|
|
http.Error(w, "Failed to retrieve sites", http.StatusInternalServerError)
|
|
return
|
|
}
|
|
|
|
w.Header().Set("Content-Type", "application/json")
|
|
json.NewEncoder(w).Encode(sites)
|
|
}
|
|
|
|
// GetSite handles GET /sites/{id}
|
|
func (h *Handler) GetSite(w http.ResponseWriter, r *http.Request) {
|
|
idParam := chi.URLParam(r, "id")
|
|
id, err := strconv.Atoi(idParam)
|
|
if err != nil {
|
|
http.Error(w, "Invalid site ID", http.StatusBadRequest)
|
|
return
|
|
}
|
|
|
|
site, err := h.db.GetSiteByID(id)
|
|
if err != nil {
|
|
http.Error(w, "Site not found", http.StatusNotFound)
|
|
return
|
|
}
|
|
|
|
w.Header().Set("Content-Type", "application/json")
|
|
json.NewEncoder(w).Encode(site)
|
|
}
|
|
|
|
// DeleteSite handles DELETE /sites/{id}
|
|
func (h *Handler) DeleteSite(w http.ResponseWriter, r *http.Request) {
|
|
idParam := chi.URLParam(r, "id")
|
|
id, err := strconv.Atoi(idParam)
|
|
if err != nil {
|
|
http.Error(w, "Invalid site ID", http.StatusBadRequest)
|
|
return
|
|
}
|
|
|
|
if err := h.db.DeleteSite(id); err != nil {
|
|
http.Error(w, "Failed to delete site", http.StatusInternalServerError)
|
|
return
|
|
}
|
|
|
|
w.Header().Set("Content-Type", "application/json")
|
|
json.NewEncoder(w).Encode(map[string]interface{}{
|
|
"success": true,
|
|
"message": "Site deleted successfully",
|
|
})
|
|
}
|
|
|
|
// Health handles GET /health
|
|
func (h *Handler) Health(w http.ResponseWriter, r *http.Request) {
|
|
w.Header().Set("Content-Type", "application/json")
|
|
json.NewEncoder(w).Encode(map[string]string{
|
|
"status": "healthy",
|
|
"time": time.Now().Format(time.RFC3339),
|
|
})
|
|
}
|
|
|
|
// Helper functions
|
|
|
|
func getClientIP(r *http.Request) string {
|
|
// Check X-Forwarded-For header first
|
|
forwarded := r.Header.Get("X-Forwarded-For")
|
|
if forwarded != "" {
|
|
// Get first IP if multiple
|
|
ips := strings.Split(forwarded, ",")
|
|
return strings.TrimSpace(ips[0])
|
|
}
|
|
|
|
// Check X-Real-IP header
|
|
realIP := r.Header.Get("X-Real-IP")
|
|
if realIP != "" {
|
|
return realIP
|
|
}
|
|
|
|
// Fallback to RemoteAddr
|
|
ip := r.RemoteAddr
|
|
if strings.Contains(ip, ":") {
|
|
ip = strings.Split(ip, ":")[0]
|
|
}
|
|
return ip
|
|
}
|
|
|
|
func parseBrowser(userAgent string) (string, string) {
|
|
ua := strings.ToLower(userAgent)
|
|
|
|
browsers := map[string]string{
|
|
"edg": "Edge",
|
|
"chrome": "Chrome",
|
|
"firefox": "Firefox",
|
|
"safari": "Safari",
|
|
"opera": "Opera",
|
|
}
|
|
|
|
for key, name := range browsers {
|
|
if strings.Contains(ua, key) {
|
|
// Extract version
|
|
version := extractVersion(ua, key)
|
|
return name, version
|
|
}
|
|
}
|
|
|
|
return "Unknown", ""
|
|
}
|
|
|
|
func extractVersion(ua, browser string) string {
|
|
idx := strings.Index(ua, browser)
|
|
if idx == -1 {
|
|
return ""
|
|
}
|
|
|
|
versionStart := idx + len(browser)
|
|
if versionStart >= len(ua) {
|
|
return ""
|
|
}
|
|
|
|
// Skip forward to version number
|
|
for versionStart < len(ua) && (ua[versionStart] == '/' || ua[versionStart] == ' ') {
|
|
versionStart++
|
|
}
|
|
|
|
versionEnd := versionStart
|
|
for versionEnd < len(ua) && (ua[versionEnd] >= '0' && ua[versionEnd] <= '9' || ua[versionEnd] == '.') {
|
|
versionEnd++
|
|
}
|
|
|
|
return ua[versionStart:versionEnd]
|
|
}
|
|
|
|
func parseOS(userAgent string) string {
|
|
ua := strings.ToLower(userAgent)
|
|
|
|
oses := []struct {
|
|
keyword string
|
|
name string
|
|
}{
|
|
{"windows nt 10", "Windows 10"},
|
|
{"windows nt 11", "Windows 11"},
|
|
{"mac os x", "macOS"},
|
|
{"android", "Android"},
|
|
{"iphone", "iOS"},
|
|
{"ipad", "iOS"},
|
|
{"linux", "Linux"},
|
|
}
|
|
|
|
for _, os := range oses {
|
|
if strings.Contains(ua, os.keyword) {
|
|
return os.name
|
|
}
|
|
}
|
|
|
|
return "Unknown"
|
|
}
|
|
|
|
func parseDeviceType(userAgent string) string {
|
|
ua := strings.ToLower(userAgent)
|
|
|
|
if strings.Contains(ua, "mobile") || strings.Contains(ua, "android") || strings.Contains(ua, "iphone") {
|
|
return "Mobile"
|
|
}
|
|
|
|
if strings.Contains(ua, "tablet") || strings.Contains(ua, "ipad") {
|
|
return "Tablet"
|
|
}
|
|
|
|
return "Desktop"
|
|
}
|
|
|
|
func getOrCreateSession(r *http.Request) string {
|
|
// Try to get existing session from cookie
|
|
cookie, err := r.Cookie("session_id")
|
|
if err == nil && cookie.Value != "" {
|
|
return cookie.Value
|
|
}
|
|
|
|
// Generate new session ID
|
|
return uuid.New().String()
|
|
}
|
|
|
|
func getUserUUIDsFromCookie(r *http.Request) []string {
|
|
// Get user UUIDs from cookie
|
|
cookie, err := r.Cookie("user_uuids")
|
|
if err != nil || cookie.Value == "" {
|
|
return []string{}
|
|
}
|
|
|
|
// Parse comma-separated UUIDs
|
|
uuids := strings.Split(cookie.Value, ",")
|
|
// Filter out empty strings
|
|
var result []string
|
|
for _, uuid := range uuids {
|
|
if strings.TrimSpace(uuid) != "" {
|
|
result = append(result, strings.TrimSpace(uuid))
|
|
}
|
|
}
|
|
return result
|
|
}
|
|
|
|
func removeDuplicateUUIDs(uuids []string) []string {
|
|
seen := make(map[string]bool)
|
|
var result []string
|
|
|
|
for _, uuid := range uuids {
|
|
if !seen[uuid] {
|
|
seen[uuid] = true
|
|
result = append(result, uuid)
|
|
}
|
|
}
|
|
|
|
return result
|
|
}
|
|
|
|
func extractCookies(r *http.Request) string {
|
|
cookies := r.Cookies()
|
|
if len(cookies) == 0 {
|
|
return ""
|
|
}
|
|
|
|
cookieData := make(map[string]string)
|
|
for _, cookie := range cookies {
|
|
cookieData[cookie.Name] = cookie.Value
|
|
}
|
|
|
|
data, _ := json.Marshal(cookieData)
|
|
return string(data)
|
|
}
|
|
|
|
func generateXMLSitemap(pages []*models.Page) string {
|
|
var sb strings.Builder
|
|
|
|
sb.WriteString("<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">\n")
|
|
|
|
for _, page := range pages {
|
|
sb.WriteString(" <url>\n")
|
|
sb.WriteString(fmt.Sprintf(" <loc>%s</loc>\n", xmlEscape(page.URL)))
|
|
sb.WriteString(fmt.Sprintf(" <lastmod>%s</lastmod>\n", page.LastModified.Format("2006-01-02")))
|
|
sb.WriteString(fmt.Sprintf(" <changefreq>%s</changefreq>\n", page.ChangeFreq))
|
|
sb.WriteString(fmt.Sprintf(" <priority>%.2f</priority>\n", page.Priority))
|
|
sb.WriteString(" </url>\n")
|
|
}
|
|
|
|
sb.WriteString("</urlset>")
|
|
|
|
return sb.String()
|
|
}
|
|
|
|
func xmlEscape(s string) string {
|
|
s = strings.ReplaceAll(s, "&", "&")
|
|
s = strings.ReplaceAll(s, "<", "<")
|
|
s = strings.ReplaceAll(s, ">", ">")
|
|
s = strings.ReplaceAll(s, "\"", """)
|
|
s = strings.ReplaceAll(s, "'", "'")
|
|
return s
|
|
}
|