Increase Concurrency for Faster Crawling
This commit is contained in:
@@ -2,6 +2,7 @@ package crawler
|
|||||||
|
|
||||||
import (
|
import (
|
||||||
"fmt"
|
"fmt"
|
||||||
|
"math"
|
||||||
"net/http"
|
"net/http"
|
||||||
"net/url"
|
"net/url"
|
||||||
"sitemap-api/database"
|
"sitemap-api/database"
|
||||||
@@ -166,7 +167,7 @@ func (c *Crawler) crawlURL(urlStr string, depth int) {
|
|||||||
|
|
||||||
// Crawl found links concurrently (with limited concurrency)
|
// Crawl found links concurrently (with limited concurrency)
|
||||||
var wg sync.WaitGroup
|
var wg sync.WaitGroup
|
||||||
semaphore := make(chan struct{}, 5) // Limit to 5 concurrent requests
|
semaphore := make(chan struct{}, 10) // Limit to 10 concurrent requests
|
||||||
|
|
||||||
for _, link := range links {
|
for _, link := range links {
|
||||||
if depth+1 <= c.maxDepth {
|
if depth+1 <= c.maxDepth {
|
||||||
@@ -268,12 +269,13 @@ func (c *Crawler) calculatePriority(depth int) float64 {
|
|||||||
if depth == 0 {
|
if depth == 0 {
|
||||||
return 1.0
|
return 1.0
|
||||||
}
|
}
|
||||||
// Decrease priority with depth
|
// Decrease priority with depth using clean decimals
|
||||||
priority := 1.0 - (float64(depth) * 0.2)
|
priority := 1.0 - (float64(depth) * 0.2)
|
||||||
if priority < 0.3 {
|
if priority < 0.3 {
|
||||||
priority = 0.3
|
priority = 0.3
|
||||||
}
|
}
|
||||||
return priority
|
// Round to 2 decimal places to avoid floating-point precision issues
|
||||||
|
return math.Round(priority*100) / 100
|
||||||
}
|
}
|
||||||
|
|
||||||
func (c *Crawler) sendEvent(eventType string, data interface{}) {
|
func (c *Crawler) sendEvent(eventType string, data interface{}) {
|
||||||
|
|||||||
Reference in New Issue
Block a user