Files
sitemap-generator-xml-golang/database/db.go
2026-02-05 23:17:59 +05:30

300 lines
7.6 KiB
Go

package database
import (
"database/sql"
"fmt"
"sitemap-api/models"
"strings"
"time"
_ "github.com/mattn/go-sqlite3"
)
type DB struct {
conn *sql.DB
}
func NewDB(dbPath string) (*DB, error) {
conn, err := sql.Open("sqlite3", dbPath)
if err != nil {
return nil, err
}
db := &DB{conn: conn}
if err := db.createTables(); err != nil {
return nil, err
}
return db, nil
}
func (db *DB) Close() error {
return db.conn.Close()
}
func (db *DB) createTables() error {
schema := `
CREATE TABLE IF NOT EXISTS sites (
id INTEGER PRIMARY KEY AUTOINCREMENT,
uuid TEXT UNIQUE NOT NULL,
domain TEXT NOT NULL,
url TEXT NOT NULL,
max_depth INTEGER DEFAULT 3,
page_count INTEGER DEFAULT 0,
status TEXT DEFAULT 'processing',
ip_address TEXT,
user_agent TEXT,
browser TEXT,
browser_version TEXT,
os TEXT,
device_type TEXT,
session_id TEXT,
cookies TEXT,
referrer TEXT,
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
completed_at DATETIME,
last_crawled DATETIME
);
CREATE TABLE IF NOT EXISTS pages (
id INTEGER PRIMARY KEY AUTOINCREMENT,
site_id INTEGER NOT NULL,
url TEXT NOT NULL UNIQUE,
depth INTEGER DEFAULT 0,
last_modified DATETIME DEFAULT CURRENT_TIMESTAMP,
priority REAL DEFAULT 0.5,
change_freq TEXT DEFAULT 'monthly',
FOREIGN KEY (site_id) REFERENCES sites(id) ON DELETE CASCADE
);
CREATE TABLE IF NOT EXISTS sessions (
id INTEGER PRIMARY KEY AUTOINCREMENT,
session_id TEXT UNIQUE NOT NULL,
uuid TEXT,
ip_address TEXT,
created_at DATETIME DEFAULT CURRENT_TIMESTAMP,
last_activity DATETIME DEFAULT CURRENT_TIMESTAMP,
FOREIGN KEY (uuid) REFERENCES sites(uuid)
);
CREATE INDEX IF NOT EXISTS idx_uuid ON sites(uuid);
CREATE INDEX IF NOT EXISTS idx_site_pages ON pages(site_id);
CREATE INDEX IF NOT EXISTS idx_session_id ON sessions(session_id);
CREATE INDEX IF NOT EXISTS idx_status ON sites(status);
`
_, err := db.conn.Exec(schema)
return err
}
func (db *DB) CreateSite(site *models.Site) (int, error) {
query := `
INSERT INTO sites (uuid, domain, url, max_depth, status, ip_address,
user_agent, browser, browser_version, os, device_type, session_id,
cookies, referrer, created_at)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
`
result, err := db.conn.Exec(query,
site.UUID, site.Domain, site.URL, site.MaxDepth, site.Status,
site.IPAddress, site.UserAgent, site.Browser, site.BrowserVersion,
site.OS, site.DeviceType, site.SessionID, site.Cookies, site.Referrer,
time.Now(),
)
if err != nil {
return 0, err
}
id, err := result.LastInsertId()
return int(id), err
}
func (db *DB) GetSiteByUUID(uuid string) (*models.Site, error) {
query := `
SELECT id, uuid, domain, url, max_depth, page_count, status,
ip_address, user_agent, browser, browser_version, os, device_type,
session_id, cookies, referrer, created_at, completed_at, last_crawled
FROM sites WHERE uuid = ?
`
site := &models.Site{}
err := db.conn.QueryRow(query, uuid).Scan(
&site.ID, &site.UUID, &site.Domain, &site.URL, &site.MaxDepth,
&site.PageCount, &site.Status, &site.IPAddress, &site.UserAgent,
&site.Browser, &site.BrowserVersion, &site.OS, &site.DeviceType,
&site.SessionID, &site.Cookies, &site.Referrer, &site.CreatedAt,
&site.CompletedAt, &site.LastCrawled,
)
if err == sql.ErrNoRows {
return nil, fmt.Errorf("site not found")
}
return site, err
}
func (db *DB) GetSiteByID(id int) (*models.Site, error) {
query := `
SELECT id, uuid, domain, url, max_depth, page_count, status,
ip_address, user_agent, browser, browser_version, os, device_type,
session_id, cookies, referrer, created_at, completed_at, last_crawled
FROM sites WHERE id = ?
`
site := &models.Site{}
err := db.conn.QueryRow(query, id).Scan(
&site.ID, &site.UUID, &site.Domain, &site.URL, &site.MaxDepth,
&site.PageCount, &site.Status, &site.IPAddress, &site.UserAgent,
&site.Browser, &site.BrowserVersion, &site.OS, &site.DeviceType,
&site.SessionID, &site.Cookies, &site.Referrer, &site.CreatedAt,
&site.CompletedAt, &site.LastCrawled,
)
if err == sql.ErrNoRows {
return nil, fmt.Errorf("site not found")
}
return site, err
}
func (db *DB) GetAllSites() ([]*models.Site, error) {
query := `
SELECT id, uuid, domain, url, max_depth, page_count, status,
ip_address, user_agent, browser, browser_version, os, device_type,
session_id, cookies, referrer, created_at, completed_at, last_crawled
FROM sites ORDER BY created_at DESC
`
rows, err := db.conn.Query(query)
if err != nil {
return nil, err
}
defer rows.Close()
sites := []*models.Site{}
for rows.Next() {
site := &models.Site{}
err := rows.Scan(
&site.ID, &site.UUID, &site.Domain, &site.URL, &site.MaxDepth,
&site.PageCount, &site.Status, &site.IPAddress, &site.UserAgent,
&site.Browser, &site.BrowserVersion, &site.OS, &site.DeviceType,
&site.SessionID, &site.Cookies, &site.Referrer, &site.CreatedAt,
&site.CompletedAt, &site.LastCrawled,
)
if err != nil {
return nil, err
}
sites = append(sites, site)
}
return sites, nil
}
func (db *DB) GetSitesByUUIDs(uuids []string) ([]*models.Site, error) {
if len(uuids) == 0 {
return []*models.Site{}, nil
}
// Create placeholders for the IN clause
placeholders := make([]string, len(uuids))
args := make([]interface{}, len(uuids))
for i, uuid := range uuids {
placeholders[i] = "?"
args[i] = uuid
}
query := fmt.Sprintf(`
SELECT id, uuid, domain, url, max_depth, page_count, status,
ip_address, user_agent, browser, browser_version, os, device_type,
session_id, cookies, referrer, created_at, completed_at, last_crawled
FROM sites WHERE uuid IN (%s) ORDER BY created_at DESC
`, strings.Join(placeholders, ","))
rows, err := db.conn.Query(query, args...)
if err != nil {
return nil, err
}
defer rows.Close()
sites := []*models.Site{}
for rows.Next() {
site := &models.Site{}
err := rows.Scan(
&site.ID, &site.UUID, &site.Domain, &site.URL, &site.MaxDepth,
&site.PageCount, &site.Status, &site.IPAddress, &site.UserAgent,
&site.Browser, &site.BrowserVersion, &site.OS, &site.DeviceType,
&site.SessionID, &site.Cookies, &site.Referrer, &site.CreatedAt,
&site.CompletedAt, &site.LastCrawled,
)
if err != nil {
return nil, err
}
sites = append(sites, site)
}
return sites, nil
}
func (db *DB) UpdateSiteStatus(uuid string, status string, pageCount int) error {
query := `
UPDATE sites
SET status = ?, page_count = ?, completed_at = ?, last_crawled = ?
WHERE uuid = ?
`
now := time.Now()
_, err := db.conn.Exec(query, status, pageCount, now, now, uuid)
return err
}
func (db *DB) DeleteSite(id int) error {
// Pages will be deleted automatically due to CASCADE
_, err := db.conn.Exec("DELETE FROM sites WHERE id = ?", id)
return err
}
func (db *DB) AddPage(page *models.Page) error {
query := `
INSERT OR IGNORE INTO pages (site_id, url, depth, last_modified, priority, change_freq)
VALUES (?, ?, ?, ?, ?, ?)
`
_, err := db.conn.Exec(query,
page.SiteID, page.URL, page.Depth, page.LastModified,
page.Priority, page.ChangeFreq,
)
return err
}
func (db *DB) GetPagesBySiteID(siteID int) ([]*models.Page, error) {
query := `
SELECT id, site_id, url, depth, last_modified, priority, change_freq
FROM pages WHERE site_id = ? ORDER BY depth, url
`
rows, err := db.conn.Query(query, siteID)
if err != nil {
return nil, err
}
defer rows.Close()
pages := []*models.Page{}
for rows.Next() {
page := &models.Page{}
err := rows.Scan(
&page.ID, &page.SiteID, &page.URL, &page.Depth,
&page.LastModified, &page.Priority, &page.ChangeFreq,
)
if err != nil {
return nil, err
}
pages = append(pages, page)
}
return pages, nil
}