update schema validator
This commit is contained in:
87
.hta_slug/schema-validator.php
Normal file
87
.hta_slug/schema-validator.php
Normal file
@@ -0,0 +1,87 @@
|
||||
<?php
|
||||
header('Content-Type: application/json; charset=utf-8');
|
||||
|
||||
$input = json_decode(file_get_contents("php://input"), true);
|
||||
$url = $input['url'] ?? null;
|
||||
|
||||
$response = [
|
||||
"success" => false,
|
||||
"detected" => [],
|
||||
"errors" => [],
|
||||
"warnings" => [],
|
||||
"info" => []
|
||||
];
|
||||
|
||||
if (!$url || !filter_var($url, FILTER_VALIDATE_URL)) {
|
||||
$response['errors'][] = "Valid URL required";
|
||||
echo json_encode($response);
|
||||
exit;
|
||||
}
|
||||
|
||||
/* fetch html */
|
||||
$ch = curl_init($url);
|
||||
curl_setopt_array($ch, [
|
||||
CURLOPT_RETURNTRANSFER => true,
|
||||
CURLOPT_FOLLOWLOCATION => true,
|
||||
CURLOPT_TIMEOUT => 20,
|
||||
CURLOPT_USERAGENT => 'SchemaValidatorBot/1.0',
|
||||
CURLOPT_ENCODING => '' // gzip support
|
||||
]);
|
||||
|
||||
$html = curl_exec($ch);
|
||||
curl_close($ch);
|
||||
|
||||
if (!$html) {
|
||||
$response['errors'][] = "Failed to fetch URL";
|
||||
echo json_encode($response);
|
||||
exit;
|
||||
}
|
||||
|
||||
/* extract JSON-LD */
|
||||
preg_match_all(
|
||||
'/<script[^>]+type=["\']application\/ld\+json["\'][^>]*>(.*?)<\/script>/is',
|
||||
$html,
|
||||
$matches
|
||||
);
|
||||
|
||||
if (empty($matches[1])) {
|
||||
$response['warnings'][] = "No schema found in initial HTML";
|
||||
|
||||
// heuristic for SPA
|
||||
if (
|
||||
stripos($html, '__NEXT_DATA__') !== false ||
|
||||
stripos($html, 'id="root"') !== false ||
|
||||
stripos($html, 'data-reactroot') !== false
|
||||
) {
|
||||
$response['info'][] =
|
||||
"This site appears to be client-side rendered. Schema may exist but cannot be validated using PHP-only.";
|
||||
}
|
||||
|
||||
echo json_encode($response, JSON_PRETTY_PRINT);
|
||||
exit;
|
||||
}
|
||||
|
||||
/* analyze schemas */
|
||||
foreach ($matches[1] as $schema) {
|
||||
$json = json_decode($schema, true);
|
||||
|
||||
if (json_last_error() !== JSON_ERROR_NONE) {
|
||||
$response['errors'][] = json_last_error_msg();
|
||||
continue;
|
||||
}
|
||||
|
||||
if (isset($json['@type'])) {
|
||||
$response['detected'][] = $json['@type'];
|
||||
} elseif (isset($json[0])) {
|
||||
foreach ($json as $item) {
|
||||
if (isset($item['@type'])) {
|
||||
$response['detected'][] = $item['@type'];
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
$response['detected'] = array_values(array_unique($response['detected']));
|
||||
$response['success'] = empty($response['errors']);
|
||||
|
||||
echo json_encode($response, JSON_PRETTY_PRINT);
|
||||
Reference in New Issue
Block a user