This commit is contained in:
Kar
2026-03-11 13:23:09 +05:30
parent 2027b81cfd
commit 45754802a6

94
main.go
View File

@@ -5,32 +5,31 @@ import (
"fmt" "fmt"
"log" "log"
"strings" "strings"
"time"
_ "github.com/go-sql-driver/mysql" _ "github.com/go-sql-driver/mysql"
"golang.org/x/net/publicsuffix" "golang.org/x/net/publicsuffix"
) )
func main() { func main() {
// Connect to database // Connect to database with retry logic
db, err := sql.Open("mysql", "sp:0000@tcp(l2:3306)/sp_spider") db, err := connectWithRetry("sp:0000@tcp(l2:3306)/sp_spider")
if err != nil { if err != nil {
log.Fatalf("Failed to connect to database: %v", err) log.Fatalf("Failed to connect to database: %v", err)
} }
defer db.Close() defer db.Close()
// Test connection // Process domains in batches to avoid connection timeouts
if err := db.Ping(); err != nil { for {
log.Fatalf("Failed to ping database: %v", err)
}
// Query only unprocessed domains from unique_domains table // Query only unprocessed domains from unique_domains table
rows, err := db.Query("SELECT id, domain FROM unique_domains WHERE valid IS NULL") rows, err := db.Query("SELECT id, domain FROM unique_domains WHERE valid IS NULL LIMIT 1000")
if err != nil { if err != nil {
log.Fatalf("Failed to query domains: %v", err) log.Printf("Failed to query domains, retrying: %v", err)
time.Sleep(5 * time.Second)
continue
} }
defer rows.Close()
// Process each domain one by one var processedCount int
for rows.Next() { for rows.Next() {
var id int var id int
var domain string var domain string
@@ -42,19 +41,76 @@ func main() {
// Process domain // Process domain
domainType := processDomain(domain) domainType := processDomain(domain)
// Update the domain record // Update the domain record with retry
updateQuery := "UPDATE unique_domains SET valid = ? WHERE id = ?" if err := updateDomainWithRetry(db, id, domain, domainType); err != nil {
_, err = db.Exec(updateQuery, domainType, id)
if err != nil {
log.Printf("Failed to update domain %s: %v", domain, err) log.Printf("Failed to update domain %s: %v", domain, err)
} else { continue
fmt.Printf("Updated ID %d: %s -> %s\n", id, domain, domainType) }
processedCount++
}
rows.Close()
if err := rows.Err(); err != nil {
log.Printf("Row iteration error: %v", err)
time.Sleep(5 * time.Second)
continue
}
if processedCount == 0 {
fmt.Println("No more unprocessed domains found.")
break
}
fmt.Printf("Processed %d domains in this batch.\n", processedCount)
time.Sleep(1 * time.Second) // Brief pause between batches
} }
} }
if err = rows.Err(); err != nil { func connectWithRetry(dsn string) (*sql.DB, error) {
log.Fatalf("Row iteration error: %v", err) var db *sql.DB
var err error
for i := 0; i < 5; i++ {
db, err = sql.Open("mysql", dsn)
if err != nil {
return nil, err
} }
if err = db.Ping(); err != nil {
log.Printf("Connection attempt %d failed: %v", i+1, err)
if i < 4 {
time.Sleep(time.Duration(i+1) * time.Second)
continue
}
return nil, err
}
// Configure connection pool
db.SetMaxOpenConns(10)
db.SetMaxIdleConns(5)
db.SetConnMaxLifetime(5 * time.Minute)
return db, nil
}
return nil, err
}
func updateDomainWithRetry(db *sql.DB, id int, domain, domainType string) error {
for i := 0; i < 3; i++ {
updateQuery := "UPDATE unique_domains SET valid = ? WHERE id = ?"
_, err := db.Exec(updateQuery, domainType, id)
if err == nil {
fmt.Printf("Updated ID %d: %s -> %s\n", id, domain, domainType)
return nil
}
log.Printf("Update attempt %d failed for %s: %v", i+1, domain, err)
if i < 2 {
time.Sleep(time.Duration(i+1) * time.Second)
}
}
return fmt.Errorf("failed after 3 attempts")
} }
func processDomain(domain string) string { func processDomain(domain string) string {