Increase Concurrency for Faster Crawling

This commit is contained in:
Kar
2026-02-05 19:57:37 +05:30
parent b80e988191
commit 228cf2f05c

View File

@@ -2,6 +2,7 @@ package crawler
import ( import (
"fmt" "fmt"
"math"
"net/http" "net/http"
"net/url" "net/url"
"sitemap-api/database" "sitemap-api/database"
@@ -166,7 +167,7 @@ func (c *Crawler) crawlURL(urlStr string, depth int) {
// Crawl found links concurrently (with limited concurrency) // Crawl found links concurrently (with limited concurrency)
var wg sync.WaitGroup var wg sync.WaitGroup
semaphore := make(chan struct{}, 5) // Limit to 5 concurrent requests semaphore := make(chan struct{}, 10) // Limit to 10 concurrent requests
for _, link := range links { for _, link := range links {
if depth+1 <= c.maxDepth { if depth+1 <= c.maxDepth {
@@ -268,12 +269,13 @@ func (c *Crawler) calculatePriority(depth int) float64 {
if depth == 0 { if depth == 0 {
return 1.0 return 1.0
} }
// Decrease priority with depth // Decrease priority with depth using clean decimals
priority := 1.0 - (float64(depth) * 0.2) priority := 1.0 - (float64(depth) * 0.2)
if priority < 0.3 { if priority < 0.3 {
priority = 0.3 priority = 0.3
} }
return priority // Round to 2 decimal places to avoid floating-point precision issues
return math.Round(priority*100) / 100
} }
func (c *Crawler) sendEvent(eventType string, data interface{}) { func (c *Crawler) sendEvent(eventType string, data interface{}) {