選択できるのは25トピックまでです。 トピックは、先頭が英数字で、英数字とダッシュ('-')を使用した35文字以内のものにしてください。

336 行
12KB

  1. <?php
  2. declare(strict_types=1);
  3. namespace App\Services;
  4. use RuntimeException;
  5. class GoogleSheetImportService
  6. {
  7. /**
  8. * @return array{id: string, sheets: list<array{gid: string, title: string}>}
  9. */
  10. public function sheets(string $url): array
  11. {
  12. $spreadsheetId = $this->spreadsheetId($url);
  13. $currentGid = $this->gidFromUrl($url);
  14. $sheets = [];
  15. // Strategy 1: v3 worksheets JSON feed — works for publicly published sheets.
  16. // Each entry's alternate link href contains the numeric gid.
  17. if ($sheets === []) {
  18. try {
  19. $feed = $this->fetch(
  20. 'https://spreadsheets.google.com/feeds/worksheets/'
  21. . rawurlencode($spreadsheetId) . '/public/basic?alt=json'
  22. );
  23. $sheets = $this->extractSheetsFromFeed($feed);
  24. } catch (\Throwable) {}
  25. }
  26. // Strategy 2: htmlview URL — serves rendered HTML with tab links for
  27. // sheets shared "anyone with the link can view".
  28. if ($sheets === []) {
  29. try {
  30. $html = $this->fetch(
  31. 'https://docs.google.com/spreadsheets/d/'
  32. . rawurlencode($spreadsheetId) . '/htmlview'
  33. );
  34. $sheets = $this->extractSheets($html);
  35. } catch (\Throwable) {}
  36. }
  37. // Strategy 3: edit URL JS-bootstrapped data — last resort.
  38. if ($sheets === []) {
  39. try {
  40. $html = $this->fetch(
  41. 'https://docs.google.com/spreadsheets/d/'
  42. . rawurlencode($spreadsheetId) . '/edit?usp=sharing'
  43. );
  44. $sheets = $this->extractSheets($html);
  45. } catch (\Throwable) {}
  46. }
  47. // Fallback: if we know the gid from the URL, return a labelled placeholder.
  48. if ($sheets === [] && $currentGid !== null) {
  49. $sheets[] = ['gid' => $currentGid, 'title' => 'Sheet ' . $currentGid];
  50. }
  51. if ($sheets === []) {
  52. $sheets[] = ['gid' => '0', 'title' => 'First sheet'];
  53. }
  54. return ['id' => $spreadsheetId, 'sheets' => $sheets];
  55. }
  56. /**
  57. * @return array{headers: list<string>, rows: list<array<string, string>>}
  58. */
  59. public function rows(string $url, string $gid): array
  60. {
  61. $spreadsheetId = $this->spreadsheetId($url);
  62. $csv = $this->fetch(sprintf(
  63. 'https://docs.google.com/spreadsheets/d/%s/export?format=csv&gid=%s',
  64. rawurlencode($spreadsheetId),
  65. rawurlencode($gid)
  66. ));
  67. return $this->parseCsv($csv);
  68. }
  69. public function spreadsheetId(string $url): string
  70. {
  71. $parts = parse_url($url);
  72. $host = strtolower((string) ($parts['host'] ?? ''));
  73. if (!in_array($host, ['docs.google.com', 'spreadsheets.google.com'], true)) {
  74. throw new RuntimeException('Enter a valid Google Sheets URL.');
  75. }
  76. $path = (string) ($parts['path'] ?? '');
  77. if (preg_match('#/spreadsheets/d/([a-zA-Z0-9_-]+)#', $path, $matches) !== 1) {
  78. throw new RuntimeException('The Google Sheets URL does not include a spreadsheet id.');
  79. }
  80. return $matches[1];
  81. }
  82. // ── Sheet extraction ──────────────────────────────────────────────────────
  83. /**
  84. * Parse the v3 JSON feed response.
  85. *
  86. * @return list<array{gid: string, title: string}>
  87. */
  88. private function extractSheetsFromFeed(string $json): array
  89. {
  90. $data = json_decode($json, true);
  91. if (!is_array($data) || !isset($data['feed']['entry'])) {
  92. return [];
  93. }
  94. $sheets = [];
  95. foreach ((array) $data['feed']['entry'] as $entry) {
  96. $title = (string) ($entry['title']['$t'] ?? '');
  97. if ($title === '') {
  98. continue;
  99. }
  100. // GID is embedded in the rel="alternate" link href as #gid=NNN or &gid=NNN
  101. $gid = null;
  102. foreach ((array) ($entry['link'] ?? []) as $link) {
  103. if (preg_match('/[#&]gid=(\d+)/', (string) ($link['href'] ?? ''), $m)) {
  104. $gid = $m[1];
  105. break;
  106. }
  107. }
  108. if ($gid !== null && $this->looksLikeSheet($gid, $title) && !isset($sheets[$gid])) {
  109. $sheets[$gid] = ['gid' => $gid, 'title' => $title];
  110. }
  111. }
  112. return array_values($sheets);
  113. }
  114. /**
  115. * Parse HTML from htmlview or edit URL for sheet tab data.
  116. *
  117. * @return list<array{gid: string, title: string}>
  118. */
  119. private function extractSheets(string $html): array
  120. {
  121. $sheets = [];
  122. // ── HTML tab patterns (htmlview format) ───────────────────────────────
  123. // Google renders tab links like:
  124. // <a href="#gid=123">Sheet Name</a>
  125. // <span data-id="123">Sheet Name</span>
  126. $htmlPatterns = [
  127. '/<[^>]+href=["\'][^"\']*[#&]gid=(\d+)["\'][^>]*>\s*(?:<[^>]+>\s*)*([^<]{1,100}?)\s*(?:<|$)/i',
  128. '/data-id=["\'](\d+)["\'][^>]*>\s*([^<]{1,100}?)\s*</i',
  129. ];
  130. foreach ($htmlPatterns as $pattern) {
  131. if (preg_match_all($pattern, $html, $matches, PREG_SET_ORDER) > 0) {
  132. foreach ($matches as $match) {
  133. $gid = $match[1];
  134. $title = trim(html_entity_decode($match[2], ENT_QUOTES | ENT_HTML5, 'UTF-8'));
  135. if ($this->looksLikeSheet($gid, $title) && !isset($sheets[$gid])) {
  136. $sheets[$gid] = ['gid' => $gid, 'title' => $title];
  137. }
  138. }
  139. }
  140. }
  141. if (!empty($sheets)) {
  142. return array_values($sheets);
  143. }
  144. // ── JavaScript JSON patterns (edit URL bootstrapped data) ─────────────
  145. // Distance increased to 600 chars to handle larger embedded JSON objects.
  146. $jsPatterns = [
  147. '/"gid"\s*:\s*(\d+).{0,600}?"name"\s*:\s*"((?:\\\\.|[^"\\\\])+)"/s',
  148. '/"name"\s*:\s*"((?:\\\\.|[^"\\\\])+)".{0,600}?"gid"\s*:\s*(\d+)/s',
  149. '/"gid"\s*:\s*(\d+).{0,600}?"title"\s*:\s*"((?:\\\\.|[^"\\\\])+)"/s',
  150. '/"title"\s*:\s*"((?:\\\\.|[^"\\\\])+)".{0,600}?"gid"\s*:\s*(\d+)/s',
  151. '/\[\s*(\d+)\s*,\s*"((?:\\\\.|[^"\\\\])+)"/s',
  152. ];
  153. foreach ($jsPatterns as $pattern) {
  154. if (preg_match_all($pattern, $html, $matches, PREG_SET_ORDER) > 0) {
  155. foreach ($matches as $match) {
  156. $first = (string) $match[1];
  157. $second = (string) $match[2];
  158. $gid = ctype_digit($first) ? $first : $second;
  159. $title = ctype_digit($first) ? $second : $first;
  160. $title = $this->decodeJsString($title);
  161. if (!$this->looksLikeSheet($gid, $title) || isset($sheets[$gid])) {
  162. continue;
  163. }
  164. $sheets[$gid] = ['gid' => $gid, 'title' => $title];
  165. }
  166. if (!empty($sheets)) {
  167. break;
  168. }
  169. }
  170. }
  171. return array_values($sheets);
  172. }
  173. private function looksLikeSheet(string $gid, string $title): bool
  174. {
  175. if ($gid === '' || !ctype_digit($gid) || $title === '' || strlen($title) > 120) {
  176. return false;
  177. }
  178. return !str_contains($title, '<')
  179. && !str_contains($title, '{')
  180. && !str_contains(strtolower($title), 'http');
  181. }
  182. private function decodeJsString(string $value): string
  183. {
  184. $decoded = json_decode('"' . str_replace('"', '\\"', $value) . '"', true);
  185. return is_string($decoded) ? $decoded : stripcslashes($value);
  186. }
  187. private function gidFromUrl(string $url): ?string
  188. {
  189. $fragment = parse_url($url, PHP_URL_FRAGMENT);
  190. $query = parse_url($url, PHP_URL_QUERY);
  191. foreach ([(string) $fragment, (string) $query] as $part) {
  192. if (preg_match('/(?:^|&)gid=(\d+)/', $part, $matches) === 1) {
  193. return $matches[1];
  194. }
  195. }
  196. return null;
  197. }
  198. // ── HTTP fetch ────────────────────────────────────────────────────────────
  199. private function fetch(string $url): string
  200. {
  201. $context = stream_context_create([
  202. 'http' => [
  203. 'method' => 'GET',
  204. 'timeout' => 15,
  205. 'ignore_errors' => true,
  206. 'follow_location' => true,
  207. 'max_redirects' => 5,
  208. 'header' => implode("\r\n", [
  209. 'User-Agent: Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/124.0.0.0 Safari/537.36',
  210. 'Accept: text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
  211. 'Accept-Language: en-US,en;q=0.5',
  212. ]) . "\r\n",
  213. ],
  214. ]);
  215. $content = @file_get_contents($url, false, $context);
  216. if (!is_string($content) || trim($content) === '') {
  217. throw new RuntimeException(
  218. 'Could not reach the Google Sheet. Check the URL and make sure the sheet is shared as "Anyone with the link can view".'
  219. );
  220. }
  221. // Google returns a sign-in page when the sheet requires authentication.
  222. // Detect this by looking for the login shell markers present in every
  223. // Google auth redirect (~9 KB of CSS/JS with no actual sheet data).
  224. if (
  225. str_contains($content, '.login,.request-storage-access') ||
  226. str_contains($content, 'ServiceLogin') ||
  227. str_contains($content, 'accounts.google.com/ServiceLogin')
  228. ) {
  229. throw new RuntimeException(
  230. 'Google returned a sign-in page. The spreadsheet must be shared as "Anyone with the link can view": '
  231. . 'open the sheet → File → Share → Change to "Anyone with the link" → Viewer.'
  232. );
  233. }
  234. return $content;
  235. }
  236. // ── CSV parsing ───────────────────────────────────────────────────────────
  237. /**
  238. * @return array{headers: list<string>, rows: list<array<string, string>>}
  239. */
  240. private function parseCsv(string $csv): array
  241. {
  242. $handle = fopen('php://temp', 'r+');
  243. if ($handle === false) {
  244. throw new RuntimeException('Unable to parse sheet data.');
  245. }
  246. fwrite($handle, $csv);
  247. rewind($handle);
  248. $headers = fgetcsv($handle);
  249. if ($headers === false) {
  250. fclose($handle);
  251. throw new RuntimeException('The selected sheet is empty.');
  252. }
  253. $headers = array_map(
  254. static fn($h): string => trim((string) $h, " \t\n\r\0\x0B\xEF\xBB\xBF"),
  255. $headers
  256. );
  257. $rows = [];
  258. while (($values = fgetcsv($handle)) !== false) {
  259. $row = [];
  260. $hasValue = false;
  261. foreach ($headers as $index => $header) {
  262. if ($header === '') {
  263. continue;
  264. }
  265. $value = trim((string) ($values[$index] ?? ''));
  266. $row[$header] = $value;
  267. $hasValue = $hasValue || $value !== '';
  268. }
  269. if ($hasValue) {
  270. $rows[] = $row;
  271. }
  272. }
  273. fclose($handle);
  274. return ['headers' => $headers, 'rows' => $rows];
  275. }
  276. }

Powered by TurnKey Linux.