init
This commit is contained in:
465
handlers/handler.go
Normal file
465
handlers/handler.go
Normal file
@@ -0,0 +1,465 @@
|
||||
package handlers
|
||||
|
||||
import (
|
||||
"encoding/json"
|
||||
"encoding/xml"
|
||||
"fmt"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"sitemap-api/crawler"
|
||||
"sitemap-api/database"
|
||||
"sitemap-api/models"
|
||||
"strconv"
|
||||
"strings"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"github.com/go-chi/chi/v5"
|
||||
"github.com/google/uuid"
|
||||
)
|
||||
|
||||
type Handler struct {
|
||||
db *database.DB
|
||||
crawler *crawler.Crawler
|
||||
streamManager *StreamManager
|
||||
}
|
||||
|
||||
func NewHandler(db *database.DB, streamManager *StreamManager) *Handler {
|
||||
return &Handler{
|
||||
db: db,
|
||||
crawler: crawler.NewCrawler(db),
|
||||
streamManager: streamManager,
|
||||
}
|
||||
}
|
||||
|
||||
// StreamManager handles multiple concurrent SSE connections
|
||||
type StreamManager struct {
|
||||
mu sync.RWMutex
|
||||
streams map[string]chan models.Event
|
||||
}
|
||||
|
||||
func NewStreamManager() *StreamManager {
|
||||
return &StreamManager{
|
||||
streams: make(map[string]chan models.Event),
|
||||
}
|
||||
}
|
||||
|
||||
func (sm *StreamManager) CreateStream(uuid string) chan models.Event {
|
||||
sm.mu.Lock()
|
||||
defer sm.mu.Unlock()
|
||||
|
||||
ch := make(chan models.Event, 100)
|
||||
sm.streams[uuid] = ch
|
||||
return ch
|
||||
}
|
||||
|
||||
func (sm *StreamManager) GetStream(uuid string) (chan models.Event, bool) {
|
||||
sm.mu.RLock()
|
||||
defer sm.mu.RUnlock()
|
||||
|
||||
ch, exists := sm.streams[uuid]
|
||||
return ch, exists
|
||||
}
|
||||
|
||||
func (sm *StreamManager) CloseStream(uuid string) {
|
||||
sm.mu.Lock()
|
||||
defer sm.mu.Unlock()
|
||||
|
||||
if ch, exists := sm.streams[uuid]; exists {
|
||||
close(ch)
|
||||
delete(sm.streams, uuid)
|
||||
}
|
||||
}
|
||||
|
||||
// GenerateSitemapXML handles POST /generate-sitemap-xml
|
||||
func (h *Handler) GenerateSitemapXML(w http.ResponseWriter, r *http.Request) {
|
||||
var req struct {
|
||||
URL string `json:"url"`
|
||||
MaxDepth int `json:"max_depth"`
|
||||
}
|
||||
|
||||
if err := json.NewDecoder(r.Body).Decode(&req); err != nil {
|
||||
http.Error(w, "Invalid request body", http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
|
||||
// Validate URL
|
||||
if req.URL == "" {
|
||||
http.Error(w, "URL is required", http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
|
||||
parsedURL, err := url.Parse(req.URL)
|
||||
if err != nil || parsedURL.Scheme == "" || parsedURL.Host == "" {
|
||||
http.Error(w, "Invalid URL format", http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
|
||||
// Set default max depth
|
||||
if req.MaxDepth <= 0 || req.MaxDepth > 5 {
|
||||
req.MaxDepth = 3
|
||||
}
|
||||
|
||||
// Generate UUID server-side
|
||||
generatedUUID := uuid.New().String()
|
||||
|
||||
// Extract client metadata
|
||||
ip := getClientIP(r)
|
||||
userAgent := r.Header.Get("User-Agent")
|
||||
browser, browserVersion := parseBrowser(userAgent)
|
||||
os := parseOS(userAgent)
|
||||
deviceType := parseDeviceType(userAgent)
|
||||
sessionID := getOrCreateSession(r)
|
||||
cookies := extractCookies(r)
|
||||
referrer := r.Header.Get("Referer")
|
||||
|
||||
// Extract domain from URL
|
||||
domain := parsedURL.Host
|
||||
|
||||
// Create site record
|
||||
site := &models.Site{
|
||||
UUID: generatedUUID,
|
||||
Domain: domain,
|
||||
URL: req.URL,
|
||||
MaxDepth: req.MaxDepth,
|
||||
Status: "processing",
|
||||
IPAddress: ip,
|
||||
UserAgent: userAgent,
|
||||
Browser: browser,
|
||||
BrowserVersion: browserVersion,
|
||||
OS: os,
|
||||
DeviceType: deviceType,
|
||||
SessionID: sessionID,
|
||||
Cookies: cookies,
|
||||
Referrer: referrer,
|
||||
}
|
||||
|
||||
siteID, err := h.db.CreateSite(site)
|
||||
if err != nil {
|
||||
http.Error(w, fmt.Sprintf("Failed to create site: %v", err), http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
// Create SSE stream for this UUID
|
||||
eventChan := h.streamManager.CreateStream(generatedUUID)
|
||||
|
||||
// Start crawling in background (non-blocking)
|
||||
go func() {
|
||||
h.crawler.Crawl(generatedUUID, req.URL, req.MaxDepth, eventChan)
|
||||
// Close stream after crawl completes
|
||||
time.Sleep(2 * time.Second) // Give time for final events to be sent
|
||||
h.streamManager.CloseStream(generatedUUID)
|
||||
}()
|
||||
|
||||
// Return immediately with UUID
|
||||
response := map[string]interface{}{
|
||||
"uuid": generatedUUID,
|
||||
"site_id": siteID,
|
||||
"status": "processing",
|
||||
"stream_url": "/stream/" + generatedUUID,
|
||||
"message": "Sitemap generation started",
|
||||
}
|
||||
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
json.NewEncoder(w).Encode(response)
|
||||
}
|
||||
|
||||
// StreamSSE handles GET /stream/{uuid}
|
||||
func (h *Handler) StreamSSE(w http.ResponseWriter, r *http.Request) {
|
||||
uuid := chi.URLParam(r, "uuid")
|
||||
|
||||
// Get event channel for this UUID
|
||||
eventChan, exists := h.streamManager.GetStream(uuid)
|
||||
if !exists {
|
||||
http.Error(w, "Stream not found", http.StatusNotFound)
|
||||
return
|
||||
}
|
||||
|
||||
// Set SSE headers
|
||||
w.Header().Set("Content-Type", "text/event-stream")
|
||||
w.Header().Set("Cache-Control", "no-cache")
|
||||
w.Header().Set("Connection", "keep-alive")
|
||||
w.Header().Set("Access-Control-Allow-Origin", "*")
|
||||
|
||||
flusher, ok := w.(http.Flusher)
|
||||
if !ok {
|
||||
http.Error(w, "Streaming unsupported", http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
// Send connected event
|
||||
connectedData := map[string]string{
|
||||
"uuid": uuid,
|
||||
"message": "Connected to stream",
|
||||
}
|
||||
connectedJSON, _ := json.Marshal(connectedData)
|
||||
fmt.Fprintf(w, "event: connected\ndata: %s\n\n", connectedJSON)
|
||||
flusher.Flush()
|
||||
|
||||
// Stream events
|
||||
for event := range eventChan {
|
||||
data, err := json.Marshal(event.Data)
|
||||
if err != nil {
|
||||
continue
|
||||
}
|
||||
fmt.Fprintf(w, "event: %s\ndata: %s\n\n", event.Type, data)
|
||||
flusher.Flush()
|
||||
}
|
||||
}
|
||||
|
||||
// DownloadSitemap handles GET /download/{uuid}
|
||||
func (h *Handler) DownloadSitemap(w http.ResponseWriter, r *http.Request) {
|
||||
uuidParam := chi.URLParam(r, "uuid")
|
||||
|
||||
// Get site by UUID
|
||||
site, err := h.db.GetSiteByUUID(uuidParam)
|
||||
if err != nil {
|
||||
http.Error(w, "Sitemap not found", http.StatusNotFound)
|
||||
return
|
||||
}
|
||||
|
||||
// Get all pages for this site
|
||||
pages, err := h.db.GetPagesBySiteID(site.ID)
|
||||
if err != nil {
|
||||
http.Error(w, "Failed to retrieve pages", http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
// Generate XML sitemap
|
||||
sitemap := generateXMLSitemap(pages)
|
||||
|
||||
// Set headers
|
||||
filename := fmt.Sprintf("sitemap-%s.xml", strings.ReplaceAll(site.Domain, ".", "-"))
|
||||
w.Header().Set("Content-Type", "application/xml; charset=utf-8")
|
||||
w.Header().Set("Content-Disposition", fmt.Sprintf("attachment; filename=\"%s\"", filename))
|
||||
w.Header().Set("X-Generated-At", time.Now().Format(time.RFC3339))
|
||||
|
||||
// Write XML
|
||||
w.Write([]byte(xml.Header))
|
||||
w.Write([]byte(sitemap))
|
||||
}
|
||||
|
||||
// GetSites handles GET /sites
|
||||
func (h *Handler) GetSites(w http.ResponseWriter, r *http.Request) {
|
||||
sites, err := h.db.GetAllSites()
|
||||
if err != nil {
|
||||
http.Error(w, "Failed to retrieve sites", http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
json.NewEncoder(w).Encode(sites)
|
||||
}
|
||||
|
||||
// GetSite handles GET /sites/{id}
|
||||
func (h *Handler) GetSite(w http.ResponseWriter, r *http.Request) {
|
||||
idParam := chi.URLParam(r, "id")
|
||||
id, err := strconv.Atoi(idParam)
|
||||
if err != nil {
|
||||
http.Error(w, "Invalid site ID", http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
|
||||
site, err := h.db.GetSiteByID(id)
|
||||
if err != nil {
|
||||
http.Error(w, "Site not found", http.StatusNotFound)
|
||||
return
|
||||
}
|
||||
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
json.NewEncoder(w).Encode(site)
|
||||
}
|
||||
|
||||
// DeleteSite handles DELETE /sites/{id}
|
||||
func (h *Handler) DeleteSite(w http.ResponseWriter, r *http.Request) {
|
||||
idParam := chi.URLParam(r, "id")
|
||||
id, err := strconv.Atoi(idParam)
|
||||
if err != nil {
|
||||
http.Error(w, "Invalid site ID", http.StatusBadRequest)
|
||||
return
|
||||
}
|
||||
|
||||
if err := h.db.DeleteSite(id); err != nil {
|
||||
http.Error(w, "Failed to delete site", http.StatusInternalServerError)
|
||||
return
|
||||
}
|
||||
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
json.NewEncoder(w).Encode(map[string]interface{}{
|
||||
"success": true,
|
||||
"message": "Site deleted successfully",
|
||||
})
|
||||
}
|
||||
|
||||
// Health handles GET /health
|
||||
func (h *Handler) Health(w http.ResponseWriter, r *http.Request) {
|
||||
w.Header().Set("Content-Type", "application/json")
|
||||
json.NewEncoder(w).Encode(map[string]string{
|
||||
"status": "healthy",
|
||||
"time": time.Now().Format(time.RFC3339),
|
||||
})
|
||||
}
|
||||
|
||||
// Helper functions
|
||||
|
||||
func getClientIP(r *http.Request) string {
|
||||
// Check X-Forwarded-For header first
|
||||
forwarded := r.Header.Get("X-Forwarded-For")
|
||||
if forwarded != "" {
|
||||
// Get first IP if multiple
|
||||
ips := strings.Split(forwarded, ",")
|
||||
return strings.TrimSpace(ips[0])
|
||||
}
|
||||
|
||||
// Check X-Real-IP header
|
||||
realIP := r.Header.Get("X-Real-IP")
|
||||
if realIP != "" {
|
||||
return realIP
|
||||
}
|
||||
|
||||
// Fallback to RemoteAddr
|
||||
ip := r.RemoteAddr
|
||||
if strings.Contains(ip, ":") {
|
||||
ip = strings.Split(ip, ":")[0]
|
||||
}
|
||||
return ip
|
||||
}
|
||||
|
||||
func parseBrowser(userAgent string) (string, string) {
|
||||
ua := strings.ToLower(userAgent)
|
||||
|
||||
browsers := map[string]string{
|
||||
"edg": "Edge",
|
||||
"chrome": "Chrome",
|
||||
"firefox": "Firefox",
|
||||
"safari": "Safari",
|
||||
"opera": "Opera",
|
||||
}
|
||||
|
||||
for key, name := range browsers {
|
||||
if strings.Contains(ua, key) {
|
||||
// Extract version
|
||||
version := extractVersion(ua, key)
|
||||
return name, version
|
||||
}
|
||||
}
|
||||
|
||||
return "Unknown", ""
|
||||
}
|
||||
|
||||
func extractVersion(ua, browser string) string {
|
||||
idx := strings.Index(ua, browser)
|
||||
if idx == -1 {
|
||||
return ""
|
||||
}
|
||||
|
||||
versionStart := idx + len(browser)
|
||||
if versionStart >= len(ua) {
|
||||
return ""
|
||||
}
|
||||
|
||||
// Skip forward to version number
|
||||
for versionStart < len(ua) && (ua[versionStart] == '/' || ua[versionStart] == ' ') {
|
||||
versionStart++
|
||||
}
|
||||
|
||||
versionEnd := versionStart
|
||||
for versionEnd < len(ua) && (ua[versionEnd] >= '0' && ua[versionEnd] <= '9' || ua[versionEnd] == '.') {
|
||||
versionEnd++
|
||||
}
|
||||
|
||||
return ua[versionStart:versionEnd]
|
||||
}
|
||||
|
||||
func parseOS(userAgent string) string {
|
||||
ua := strings.ToLower(userAgent)
|
||||
|
||||
oses := []struct {
|
||||
keyword string
|
||||
name string
|
||||
}{
|
||||
{"windows nt 10", "Windows 10"},
|
||||
{"windows nt 11", "Windows 11"},
|
||||
{"mac os x", "macOS"},
|
||||
{"android", "Android"},
|
||||
{"iphone", "iOS"},
|
||||
{"ipad", "iOS"},
|
||||
{"linux", "Linux"},
|
||||
}
|
||||
|
||||
for _, os := range oses {
|
||||
if strings.Contains(ua, os.keyword) {
|
||||
return os.name
|
||||
}
|
||||
}
|
||||
|
||||
return "Unknown"
|
||||
}
|
||||
|
||||
func parseDeviceType(userAgent string) string {
|
||||
ua := strings.ToLower(userAgent)
|
||||
|
||||
if strings.Contains(ua, "mobile") || strings.Contains(ua, "android") || strings.Contains(ua, "iphone") {
|
||||
return "Mobile"
|
||||
}
|
||||
|
||||
if strings.Contains(ua, "tablet") || strings.Contains(ua, "ipad") {
|
||||
return "Tablet"
|
||||
}
|
||||
|
||||
return "Desktop"
|
||||
}
|
||||
|
||||
func getOrCreateSession(r *http.Request) string {
|
||||
// Try to get existing session from cookie
|
||||
cookie, err := r.Cookie("session_id")
|
||||
if err == nil && cookie.Value != "" {
|
||||
return cookie.Value
|
||||
}
|
||||
|
||||
// Generate new session ID
|
||||
return uuid.New().String()
|
||||
}
|
||||
|
||||
func extractCookies(r *http.Request) string {
|
||||
cookies := r.Cookies()
|
||||
if len(cookies) == 0 {
|
||||
return ""
|
||||
}
|
||||
|
||||
cookieData := make(map[string]string)
|
||||
for _, cookie := range cookies {
|
||||
cookieData[cookie.Name] = cookie.Value
|
||||
}
|
||||
|
||||
data, _ := json.Marshal(cookieData)
|
||||
return string(data)
|
||||
}
|
||||
|
||||
func generateXMLSitemap(pages []*models.Page) string {
|
||||
var sb strings.Builder
|
||||
|
||||
sb.WriteString("<urlset xmlns=\"http://www.sitemaps.org/schemas/sitemap/0.9\">\n")
|
||||
|
||||
for _, page := range pages {
|
||||
sb.WriteString(" <url>\n")
|
||||
sb.WriteString(fmt.Sprintf(" <loc>%s</loc>\n", xmlEscape(page.URL)))
|
||||
sb.WriteString(fmt.Sprintf(" <lastmod>%s</lastmod>\n", page.LastModified.Format("2006-01-02")))
|
||||
sb.WriteString(fmt.Sprintf(" <changefreq>%s</changefreq>\n", page.ChangeFreq))
|
||||
sb.WriteString(fmt.Sprintf(" <priority>%.1f</priority>\n", page.Priority))
|
||||
sb.WriteString(" </url>\n")
|
||||
}
|
||||
|
||||
sb.WriteString("</urlset>")
|
||||
|
||||
return sb.String()
|
||||
}
|
||||
|
||||
func xmlEscape(s string) string {
|
||||
s = strings.ReplaceAll(s, "&", "&")
|
||||
s = strings.ReplaceAll(s, "<", "<")
|
||||
s = strings.ReplaceAll(s, ">", ">")
|
||||
s = strings.ReplaceAll(s, "\"", """)
|
||||
s = strings.ReplaceAll(s, "'", "'")
|
||||
return s
|
||||
}
|
||||
Reference in New Issue
Block a user