diff --git a/crawler/crawler.go b/crawler/crawler.go index 88125cc..93c4153 100644 --- a/crawler/crawler.go +++ b/crawler/crawler.go @@ -2,6 +2,7 @@ package crawler import ( "fmt" + "math" "net/http" "net/url" "sitemap-api/database" @@ -14,17 +15,17 @@ import ( ) type Crawler struct { - db *database.DB - maxDepth int - visited map[string]bool - mu sync.Mutex - baseURL *url.URL - client *http.Client - eventChan chan models.Event - uuid string - siteID int - currentDepth int - totalPages int + db *database.DB + maxDepth int + visited map[string]bool + mu sync.Mutex + baseURL *url.URL + client *http.Client + eventChan chan models.Event + uuid string + siteID int + currentDepth int + totalPages int } func NewCrawler(db *database.DB) *Crawler { @@ -166,16 +167,16 @@ func (c *Crawler) crawlURL(urlStr string, depth int) { // Crawl found links concurrently (with limited concurrency) var wg sync.WaitGroup - semaphore := make(chan struct{}, 5) // Limit to 5 concurrent requests + semaphore := make(chan struct{}, 10) // Limit to 10 concurrent requests for _, link := range links { if depth+1 <= c.maxDepth { wg.Add(1) go func(l string) { defer wg.Done() - semaphore <- struct{}{} // Acquire + semaphore <- struct{}{} // Acquire c.crawlURL(l, depth+1) - <-semaphore // Release + <-semaphore // Release }(link) } } @@ -268,12 +269,13 @@ func (c *Crawler) calculatePriority(depth int) float64 { if depth == 0 { return 1.0 } - // Decrease priority with depth + // Decrease priority with depth using clean decimals priority := 1.0 - (float64(depth) * 0.2) if priority < 0.3 { priority = 0.3 } - return priority + // Round to 2 decimal places to avoid floating-point precision issues + return math.Round(priority*100) / 100 } func (c *Crawler) sendEvent(eventType string, data interface{}) {