From 45754802a6d54afe7bf17381e01105d156f747a7 Mon Sep 17 00:00:00 2001 From: "Kar@k5" Date: Wed, 11 Mar 2026 13:23:09 +0530 Subject: [PATCH] batch --- main.go | 118 +++++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 87 insertions(+), 31 deletions(-) diff --git a/main.go b/main.go index 70c1e22..d84776c 100644 --- a/main.go +++ b/main.go @@ -5,56 +5,112 @@ import ( "fmt" "log" "strings" + "time" _ "github.com/go-sql-driver/mysql" "golang.org/x/net/publicsuffix" ) func main() { - // Connect to database - db, err := sql.Open("mysql", "sp:0000@tcp(l2:3306)/sp_spider") + // Connect to database with retry logic + db, err := connectWithRetry("sp:0000@tcp(l2:3306)/sp_spider") if err != nil { log.Fatalf("Failed to connect to database: %v", err) } defer db.Close() - // Test connection - if err := db.Ping(); err != nil { - log.Fatalf("Failed to ping database: %v", err) - } - - // Query only unprocessed domains from unique_domains table - rows, err := db.Query("SELECT id, domain FROM unique_domains WHERE valid IS NULL") - if err != nil { - log.Fatalf("Failed to query domains: %v", err) - } - defer rows.Close() - - // Process each domain one by one - for rows.Next() { - var id int - var domain string - if err := rows.Scan(&id, &domain); err != nil { - log.Printf("Failed to scan domain: %v", err) + // Process domains in batches to avoid connection timeouts + for { + // Query only unprocessed domains from unique_domains table + rows, err := db.Query("SELECT id, domain FROM unique_domains WHERE valid IS NULL LIMIT 1000") + if err != nil { + log.Printf("Failed to query domains, retrying: %v", err) + time.Sleep(5 * time.Second) continue } - // Process domain - domainType := processDomain(domain) + var processedCount int + for rows.Next() { + var id int + var domain string + if err := rows.Scan(&id, &domain); err != nil { + log.Printf("Failed to scan domain: %v", err) + continue + } - // Update the domain record - updateQuery := "UPDATE unique_domains SET valid = ? WHERE id = ?" - _, err = db.Exec(updateQuery, domainType, id) + // Process domain + domainType := processDomain(domain) + + // Update the domain record with retry + if err := updateDomainWithRetry(db, id, domain, domainType); err != nil { + log.Printf("Failed to update domain %s: %v", domain, err) + continue + } + processedCount++ + } + rows.Close() + + if err := rows.Err(); err != nil { + log.Printf("Row iteration error: %v", err) + time.Sleep(5 * time.Second) + continue + } + + if processedCount == 0 { + fmt.Println("No more unprocessed domains found.") + break + } + + fmt.Printf("Processed %d domains in this batch.\n", processedCount) + time.Sleep(1 * time.Second) // Brief pause between batches + } +} + +func connectWithRetry(dsn string) (*sql.DB, error) { + var db *sql.DB + var err error + + for i := 0; i < 5; i++ { + db, err = sql.Open("mysql", dsn) if err != nil { - log.Printf("Failed to update domain %s: %v", domain, err) - } else { + return nil, err + } + + if err = db.Ping(); err != nil { + log.Printf("Connection attempt %d failed: %v", i+1, err) + if i < 4 { + time.Sleep(time.Duration(i+1) * time.Second) + continue + } + return nil, err + } + + // Configure connection pool + db.SetMaxOpenConns(10) + db.SetMaxIdleConns(5) + db.SetConnMaxLifetime(5 * time.Minute) + + return db, nil + } + + return nil, err +} + +func updateDomainWithRetry(db *sql.DB, id int, domain, domainType string) error { + for i := 0; i < 3; i++ { + updateQuery := "UPDATE unique_domains SET valid = ? WHERE id = ?" + _, err := db.Exec(updateQuery, domainType, id) + if err == nil { fmt.Printf("Updated ID %d: %s -> %s\n", id, domain, domainType) + return nil + } + + log.Printf("Update attempt %d failed for %s: %v", i+1, domain, err) + if i < 2 { + time.Sleep(time.Duration(i+1) * time.Second) } } - - if err = rows.Err(); err != nil { - log.Fatalf("Row iteration error: %v", err) - } + return fmt.Errorf("failed after 3 attempts") } func processDomain(domain string) string {