From 397522edcb109b6d1bf42380ba50912f5f982314 Mon Sep 17 00:00:00 2001 From: Subhodip Ghosh Date: Tue, 27 Jan 2026 11:43:43 +0530 Subject: [PATCH] update schema validator --- .hta_config/conf.php | 22 ++++++- .hta_slug/dns-tools-get-a-record.php | 5 ++ .hta_slug/location-ip-to-location.php | 7 ++- .hta_slug/schema-validator.php | 87 +++++++++++++++++++++++++++ 4 files changed, 118 insertions(+), 3 deletions(-) create mode 100644 .hta_slug/schema-validator.php diff --git a/.hta_config/conf.php b/.hta_config/conf.php index 4c7b4fb..fdad10f 100644 --- a/.hta_config/conf.php +++ b/.hta_config/conf.php @@ -1,2 +1,22 @@ false, + "detected" => [], + "errors" => [], + "warnings" => [], + "info" => [] +]; + +if (!$url || !filter_var($url, FILTER_VALIDATE_URL)) { + $response['errors'][] = "Valid URL required"; + echo json_encode($response); + exit; +} + +/* fetch html */ +$ch = curl_init($url); +curl_setopt_array($ch, [ + CURLOPT_RETURNTRANSFER => true, + CURLOPT_FOLLOWLOCATION => true, + CURLOPT_TIMEOUT => 20, + CURLOPT_USERAGENT => 'SchemaValidatorBot/1.0', + CURLOPT_ENCODING => '' // gzip support +]); + +$html = curl_exec($ch); +curl_close($ch); + +if (!$html) { + $response['errors'][] = "Failed to fetch URL"; + echo json_encode($response); + exit; +} + +/* extract JSON-LD */ +preg_match_all( + '/]+type=["\']application\/ld\+json["\'][^>]*>(.*?)<\/script>/is', + $html, + $matches +); + +if (empty($matches[1])) { + $response['warnings'][] = "No schema found in initial HTML"; + + // heuristic for SPA + if ( + stripos($html, '__NEXT_DATA__') !== false || + stripos($html, 'id="root"') !== false || + stripos($html, 'data-reactroot') !== false + ) { + $response['info'][] = + "This site appears to be client-side rendered. Schema may exist but cannot be validated using PHP-only."; + } + + echo json_encode($response, JSON_PRETTY_PRINT); + exit; +} + +/* analyze schemas */ +foreach ($matches[1] as $schema) { + $json = json_decode($schema, true); + + if (json_last_error() !== JSON_ERROR_NONE) { + $response['errors'][] = json_last_error_msg(); + continue; + } + + if (isset($json['@type'])) { + $response['detected'][] = $json['@type']; + } elseif (isset($json[0])) { + foreach ($json as $item) { + if (isset($item['@type'])) { + $response['detected'][] = $item['@type']; + } + } + } +} + +$response['detected'] = array_values(array_unique($response['detected'])); +$response['success'] = empty($response['errors']); + +echo json_encode($response, JSON_PRETTY_PRINT);