false, "detected" => [], "errors" => [], "warnings" => [], "info" => [] ]; if (!$url || !filter_var($url, FILTER_VALIDATE_URL)) { $response['errors'][] = "Valid URL required"; echo json_encode($response); exit; } /* fetch html */ $ch = curl_init($url); curl_setopt_array($ch, [ CURLOPT_RETURNTRANSFER => true, CURLOPT_FOLLOWLOCATION => true, CURLOPT_TIMEOUT => 20, CURLOPT_USERAGENT => 'SchemaValidatorBot/1.0', CURLOPT_ENCODING => '' // gzip support ]); $html = curl_exec($ch); curl_close($ch); if (!$html) { $response['errors'][] = "Failed to fetch URL"; echo json_encode($response); exit; } /* extract JSON-LD */ preg_match_all( '/]+type=["\']application\/ld\+json["\'][^>]*>(.*?)<\/script>/is', $html, $matches ); if (empty($matches[1])) { $response['warnings'][] = "No schema found in initial HTML"; // heuristic for SPA if ( stripos($html, '__NEXT_DATA__') !== false || stripos($html, 'id="root"') !== false || stripos($html, 'data-reactroot') !== false ) { $response['info'][] = "This site appears to be client-side rendered. Schema may exist but cannot be validated using PHP-only."; } echo json_encode($response, JSON_PRETTY_PRINT); exit; } /* analyze schemas */ foreach ($matches[1] as $schema) { $json = json_decode($schema, true); if (json_last_error() !== JSON_ERROR_NONE) { $response['errors'][] = json_last_error_msg(); continue; } if (isset($json['@type'])) { $response['detected'][] = $json['@type']; } elseif (isset($json[0])) { foreach ($json as $item) { if (isset($item['@type'])) { $response['detected'][] = $item['@type']; } } } } $response['detected'] = array_values(array_unique($response['detected'])); $response['success'] = empty($response['errors']); echo json_encode($response, JSON_PRETTY_PRINT);