} */ public function sheets(string $url): array { $spreadsheetId = $this->spreadsheetId($url); $currentGid = $this->gidFromUrl($url); $sheets = []; // Strategy 1: v3 worksheets JSON feed — works for publicly published sheets. // Each entry's alternate link href contains the numeric gid. if ($sheets === []) { try { $feed = $this->fetch( 'https://spreadsheets.google.com/feeds/worksheets/' . rawurlencode($spreadsheetId) . '/public/basic?alt=json' ); $sheets = $this->extractSheetsFromFeed($feed); } catch (\Throwable) {} } // Strategy 2: htmlview URL — serves rendered HTML with tab links for // sheets shared "anyone with the link can view". if ($sheets === []) { try { $html = $this->fetch( 'https://docs.google.com/spreadsheets/d/' . rawurlencode($spreadsheetId) . '/htmlview' ); $sheets = $this->extractSheets($html); } catch (\Throwable) {} } // Strategy 3: edit URL JS-bootstrapped data — last resort. if ($sheets === []) { try { $html = $this->fetch( 'https://docs.google.com/spreadsheets/d/' . rawurlencode($spreadsheetId) . '/edit?usp=sharing' ); $sheets = $this->extractSheets($html); } catch (\Throwable) {} } // Fallback: if we know the gid from the URL, return a labelled placeholder. if ($sheets === [] && $currentGid !== null) { $sheets[] = ['gid' => $currentGid, 'title' => 'Sheet ' . $currentGid]; } if ($sheets === []) { $sheets[] = ['gid' => '0', 'title' => 'First sheet']; } return ['id' => $spreadsheetId, 'sheets' => $sheets]; } /** * @return array{headers: list, rows: list>} */ public function rows(string $url, string $gid): array { $spreadsheetId = $this->spreadsheetId($url); $csv = $this->fetch(sprintf( 'https://docs.google.com/spreadsheets/d/%s/export?format=csv&gid=%s', rawurlencode($spreadsheetId), rawurlencode($gid) )); return $this->parseCsv($csv); } public function spreadsheetId(string $url): string { $parts = parse_url($url); $host = strtolower((string) ($parts['host'] ?? '')); if (!in_array($host, ['docs.google.com', 'spreadsheets.google.com'], true)) { throw new RuntimeException('Enter a valid Google Sheets URL.'); } $path = (string) ($parts['path'] ?? ''); if (preg_match('#/spreadsheets/d/([a-zA-Z0-9_-]+)#', $path, $matches) !== 1) { throw new RuntimeException('The Google Sheets URL does not include a spreadsheet id.'); } return $matches[1]; } // ── Sheet extraction ────────────────────────────────────────────────────── /** * Parse the v3 JSON feed response. * * @return list */ private function extractSheetsFromFeed(string $json): array { $data = json_decode($json, true); if (!is_array($data) || !isset($data['feed']['entry'])) { return []; } $sheets = []; foreach ((array) $data['feed']['entry'] as $entry) { $title = (string) ($entry['title']['$t'] ?? ''); if ($title === '') { continue; } // GID is embedded in the rel="alternate" link href as #gid=NNN or &gid=NNN $gid = null; foreach ((array) ($entry['link'] ?? []) as $link) { if (preg_match('/[#&]gid=(\d+)/', (string) ($link['href'] ?? ''), $m)) { $gid = $m[1]; break; } } if ($gid !== null && $this->looksLikeSheet($gid, $title) && !isset($sheets[$gid])) { $sheets[$gid] = ['gid' => $gid, 'title' => $title]; } } return array_values($sheets); } /** * Parse HTML from htmlview or edit URL for sheet tab data. * * @return list */ private function extractSheets(string $html): array { $sheets = []; // ── HTML tab patterns (htmlview format) ─────────────────────────────── // Google renders tab links like: // Sheet Name // Sheet Name $htmlPatterns = [ '/<[^>]+href=["\'][^"\']*[#&]gid=(\d+)["\'][^>]*>\s*(?:<[^>]+>\s*)*([^<]{1,100}?)\s*(?:<|$)/i', '/data-id=["\'](\d+)["\'][^>]*>\s*([^<]{1,100}?)\s* 0) { foreach ($matches as $match) { $gid = $match[1]; $title = trim(html_entity_decode($match[2], ENT_QUOTES | ENT_HTML5, 'UTF-8')); if ($this->looksLikeSheet($gid, $title) && !isset($sheets[$gid])) { $sheets[$gid] = ['gid' => $gid, 'title' => $title]; } } } } if (!empty($sheets)) { return array_values($sheets); } // ── JavaScript JSON patterns (edit URL bootstrapped data) ───────────── // Distance increased to 600 chars to handle larger embedded JSON objects. $jsPatterns = [ '/"gid"\s*:\s*(\d+).{0,600}?"name"\s*:\s*"((?:\\\\.|[^"\\\\])+)"/s', '/"name"\s*:\s*"((?:\\\\.|[^"\\\\])+)".{0,600}?"gid"\s*:\s*(\d+)/s', '/"gid"\s*:\s*(\d+).{0,600}?"title"\s*:\s*"((?:\\\\.|[^"\\\\])+)"/s', '/"title"\s*:\s*"((?:\\\\.|[^"\\\\])+)".{0,600}?"gid"\s*:\s*(\d+)/s', '/\[\s*(\d+)\s*,\s*"((?:\\\\.|[^"\\\\])+)"/s', ]; foreach ($jsPatterns as $pattern) { if (preg_match_all($pattern, $html, $matches, PREG_SET_ORDER) > 0) { foreach ($matches as $match) { $first = (string) $match[1]; $second = (string) $match[2]; $gid = ctype_digit($first) ? $first : $second; $title = ctype_digit($first) ? $second : $first; $title = $this->decodeJsString($title); if (!$this->looksLikeSheet($gid, $title) || isset($sheets[$gid])) { continue; } $sheets[$gid] = ['gid' => $gid, 'title' => $title]; } if (!empty($sheets)) { break; } } } return array_values($sheets); } private function looksLikeSheet(string $gid, string $title): bool { if ($gid === '' || !ctype_digit($gid) || $title === '' || strlen($title) > 120) { return false; } return !str_contains($title, '<') && !str_contains($title, '{') && !str_contains(strtolower($title), 'http'); } private function decodeJsString(string $value): string { $decoded = json_decode('"' . str_replace('"', '\\"', $value) . '"', true); return is_string($decoded) ? $decoded : stripcslashes($value); } private function gidFromUrl(string $url): ?string { $fragment = parse_url($url, PHP_URL_FRAGMENT); $query = parse_url($url, PHP_URL_QUERY); foreach ([(string) $fragment, (string) $query] as $part) { if (preg_match('/(?:^|&)gid=(\d+)/', $part, $matches) === 1) { return $matches[1]; } } return null; } // ── HTTP fetch ──────────────────────────────────────────────────────────── private function fetch(string $url): string { $context = stream_context_create([ 'http' => [ 'method' => 'GET', 'timeout' => 15, 'ignore_errors' => true, 'follow_location' => true, 'max_redirects' => 5, 'header' => implode("\r\n", [ 'User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36', 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 'Accept-Language: en-US,en;q=0.5', ]) . "\r\n", ], ]); $content = @file_get_contents($url, false, $context); if (!is_string($content) || trim($content) === '') { throw new RuntimeException( 'Could not reach the Google Sheet. Check the URL and make sure the sheet is shared as "Anyone with the link can view".' ); } // Google returns a sign-in page when the sheet requires authentication. // Detect this by looking for the login shell markers present in every // Google auth redirect (~9 KB of CSS/JS with no actual sheet data). if ( str_contains($content, '.login,.request-storage-access') || str_contains($content, 'ServiceLogin') || str_contains($content, 'accounts.google.com/ServiceLogin') ) { throw new RuntimeException( 'Google returned a sign-in page. The spreadsheet must be shared as "Anyone with the link can view": ' . 'open the sheet → File → Share → Change to "Anyone with the link" → Viewer.' ); } return $content; } // ── CSV parsing ─────────────────────────────────────────────────────────── /** * @return array{headers: list, rows: list>} */ private function parseCsv(string $csv): array { $handle = fopen('php://temp', 'r+'); if ($handle === false) { throw new RuntimeException('Unable to parse sheet data.'); } fwrite($handle, $csv); rewind($handle); $headers = fgetcsv($handle); if ($headers === false) { fclose($handle); throw new RuntimeException('The selected sheet is empty.'); } $headers = array_map( static fn($h): string => trim((string) $h, " \t\n\r\0\x0B\xEF\xBB\xBF"), $headers ); $rows = []; while (($values = fgetcsv($handle)) !== false) { $row = []; $hasValue = false; foreach ($headers as $index => $header) { if ($header === '') { continue; } $value = trim((string) ($values[$index] ?? '')); $row[$header] = $value; $hasValue = $hasValue || $value !== ''; } if ($hasValue) { $rows[] = $row; } } fclose($handle); return ['headers' => $headers, 'rows' => $rows]; } }