package main import ( "database/sql" "fmt" "log" "strings" "time" _ "github.com/go-sql-driver/mysql" "golang.org/x/net/publicsuffix" ) func main() { // Connect to database with retry logic db, err := connectWithRetry("sp:0000@tcp(l2:3306)/sp_spider") if err != nil { log.Fatalf("Failed to connect to database: %v", err) } defer db.Close() // Process domains in batches to avoid connection timeouts for { // Query only unprocessed domains from unique_domains table rows, err := db.Query("SELECT id, domain FROM unique_domains WHERE valid IS NULL LIMIT 1000") if err != nil { log.Printf("Failed to query domains, retrying: %v", err) time.Sleep(5 * time.Second) continue } var processedCount int for rows.Next() { var id int var domain string if err := rows.Scan(&id, &domain); err != nil { log.Printf("Failed to scan domain: %v", err) continue } // Process domain domainType := processDomain(domain) // Update the domain record with retry if err := updateDomainWithRetry(db, id, domain, domainType); err != nil { log.Printf("Failed to update domain %s: %v", domain, err) continue } processedCount++ } rows.Close() if err := rows.Err(); err != nil { log.Printf("Row iteration error: %v", err) time.Sleep(5 * time.Second) continue } if processedCount == 0 { fmt.Println("No more unprocessed domains found.") break } fmt.Printf("Processed %d domains in this batch.\n", processedCount) time.Sleep(1 * time.Second) // Brief pause between batches } } func connectWithRetry(dsn string) (*sql.DB, error) { var db *sql.DB var err error for i := 0; i < 5; i++ { db, err = sql.Open("mysql", dsn) if err != nil { return nil, err } if err = db.Ping(); err != nil { log.Printf("Connection attempt %d failed: %v", i+1, err) if i < 4 { time.Sleep(time.Duration(i+1) * time.Second) continue } return nil, err } // Configure connection pool db.SetMaxOpenConns(10) db.SetMaxIdleConns(5) db.SetConnMaxLifetime(5 * time.Minute) return db, nil } return nil, err } func updateDomainWithRetry(db *sql.DB, id int, domain, domainType string) error { for i := 0; i < 3; i++ { updateQuery := "UPDATE unique_domains SET valid = ? WHERE id = ?" _, err := db.Exec(updateQuery, domainType, id) if err == nil { fmt.Printf("Updated ID %d: %s -> %s\n", id, domain, domainType) return nil } log.Printf("Update attempt %d failed for %s: %v", i+1, domain, err) if i < 2 { time.Sleep(time.Duration(i+1) * time.Second) } } return fmt.Errorf("failed after 3 attempts") } func processDomain(domain string) string { domain = strings.TrimSpace(domain) if domain == "" { return "invalid" } // Get the effective TLD+1 (registrable domain) etld1, err := publicsuffix.EffectiveTLDPlusOne(domain) if err != nil { return "invalid" } // Determine domain type if domain == etld1 { return "valid" } else { return "sub" } }