Increase Concurrency for Faster Crawling
This commit is contained in:
@@ -2,6 +2,7 @@ package crawler
|
||||
|
||||
import (
|
||||
"fmt"
|
||||
"math"
|
||||
"net/http"
|
||||
"net/url"
|
||||
"sitemap-api/database"
|
||||
@@ -166,7 +167,7 @@ func (c *Crawler) crawlURL(urlStr string, depth int) {
|
||||
|
||||
// Crawl found links concurrently (with limited concurrency)
|
||||
var wg sync.WaitGroup
|
||||
semaphore := make(chan struct{}, 5) // Limit to 5 concurrent requests
|
||||
semaphore := make(chan struct{}, 10) // Limit to 10 concurrent requests
|
||||
|
||||
for _, link := range links {
|
||||
if depth+1 <= c.maxDepth {
|
||||
@@ -268,12 +269,13 @@ func (c *Crawler) calculatePriority(depth int) float64 {
|
||||
if depth == 0 {
|
||||
return 1.0
|
||||
}
|
||||
// Decrease priority with depth
|
||||
// Decrease priority with depth using clean decimals
|
||||
priority := 1.0 - (float64(depth) * 0.2)
|
||||
if priority < 0.3 {
|
||||
priority = 0.3
|
||||
}
|
||||
return priority
|
||||
// Round to 2 decimal places to avoid floating-point precision issues
|
||||
return math.Round(priority*100) / 100
|
||||
}
|
||||
|
||||
func (c *Crawler) sendEvent(eventType string, data interface{}) {
|
||||
|
||||
Reference in New Issue
Block a user