88 lines
2.0 KiB
PHP
88 lines
2.0 KiB
PHP
<?php
|
|
header('Content-Type: application/json; charset=utf-8');
|
|
|
|
$input = json_decode(file_get_contents("php://input"), true);
|
|
$url = $input['url'] ?? null;
|
|
|
|
$response = [
|
|
"success" => false,
|
|
"detected" => [],
|
|
"errors" => [],
|
|
"warnings" => [],
|
|
"info" => []
|
|
];
|
|
|
|
if (!$url || !filter_var($url, FILTER_VALIDATE_URL)) {
|
|
$response['errors'][] = "Valid URL required";
|
|
echo json_encode($response);
|
|
exit;
|
|
}
|
|
|
|
/* fetch html */
|
|
$ch = curl_init($url);
|
|
curl_setopt_array($ch, [
|
|
CURLOPT_RETURNTRANSFER => true,
|
|
CURLOPT_FOLLOWLOCATION => true,
|
|
CURLOPT_TIMEOUT => 20,
|
|
CURLOPT_USERAGENT => 'SchemaValidatorBot/1.0',
|
|
CURLOPT_ENCODING => '' // gzip support
|
|
]);
|
|
|
|
$html = curl_exec($ch);
|
|
curl_close($ch);
|
|
|
|
if (!$html) {
|
|
$response['errors'][] = "Failed to fetch URL";
|
|
echo json_encode($response);
|
|
exit;
|
|
}
|
|
|
|
/* extract JSON-LD */
|
|
preg_match_all(
|
|
'/<script[^>]+type=["\']application\/ld\+json["\'][^>]*>(.*?)<\/script>/is',
|
|
$html,
|
|
$matches
|
|
);
|
|
|
|
if (empty($matches[1])) {
|
|
$response['warnings'][] = "No schema found in initial HTML";
|
|
|
|
// heuristic for SPA
|
|
if (
|
|
stripos($html, '__NEXT_DATA__') !== false ||
|
|
stripos($html, 'id="root"') !== false ||
|
|
stripos($html, 'data-reactroot') !== false
|
|
) {
|
|
$response['info'][] =
|
|
"This site appears to be client-side rendered. Schema may exist but cannot be validated using PHP-only.";
|
|
}
|
|
|
|
echo json_encode($response, JSON_PRETTY_PRINT);
|
|
exit;
|
|
}
|
|
|
|
/* analyze schemas */
|
|
foreach ($matches[1] as $schema) {
|
|
$json = json_decode($schema, true);
|
|
|
|
if (json_last_error() !== JSON_ERROR_NONE) {
|
|
$response['errors'][] = json_last_error_msg();
|
|
continue;
|
|
}
|
|
|
|
if (isset($json['@type'])) {
|
|
$response['detected'][] = $json['@type'];
|
|
} elseif (isset($json[0])) {
|
|
foreach ($json as $item) {
|
|
if (isset($item['@type'])) {
|
|
$response['detected'][] = $item['@type'];
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
$response['detected'] = array_values(array_unique($response['detected']));
|
|
$response['success'] = empty($response['errors']);
|
|
|
|
echo json_encode($response, JSON_PRETTY_PRINT);
|