You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

386 lines
12KB

  1. <?php
  2. declare(strict_types=1);
  3. namespace App\Services;
  4. use RuntimeException;
  5. /**
  6. * Reads job row data from an uploaded CSV or Excel (.xlsx) file.
  7. * No external libraries — CSV uses fgetcsv, xlsx uses ZipArchive + SimpleXML.
  8. */
  9. class FileImportService
  10. {
  11. private string $tempDir;
  12. public function __construct()
  13. {
  14. $this->tempDir = rtrim(sys_get_temp_dir(), '/\\') . DIRECTORY_SEPARATOR . 'ct_imports' . DIRECTORY_SEPARATOR;
  15. if (!is_dir($this->tempDir)) {
  16. mkdir($this->tempDir, 0700, true);
  17. }
  18. }
  19. // ── Upload ────────────────────────────────────────────────────────────────
  20. /**
  21. * Move an uploaded file to the temp store and return its assigned filename.
  22. *
  23. * @param array{name: string, tmp_name: string, error: int} $upload $_FILES entry
  24. */
  25. public function store(array $upload): string
  26. {
  27. if (($upload['error'] ?? UPLOAD_ERR_NO_FILE) !== UPLOAD_ERR_OK) {
  28. throw new RuntimeException('File upload failed (error code ' . $upload['error'] . ').');
  29. }
  30. $ext = strtolower(pathinfo((string) $upload['name'], PATHINFO_EXTENSION));
  31. if (!in_array($ext, ['csv', 'xlsx'], true)) {
  32. throw new RuntimeException('Only CSV (.csv) and Excel (.xlsx) files are supported.');
  33. }
  34. $filename = bin2hex(random_bytes(16)) . '.' . $ext;
  35. $dest = $this->tempDir . $filename;
  36. if (!move_uploaded_file((string) $upload['tmp_name'], $dest)) {
  37. throw new RuntimeException('Could not save the uploaded file.');
  38. }
  39. $this->cleanup();
  40. return $filename;
  41. }
  42. // ── Sheet list ────────────────────────────────────────────────────────────
  43. /**
  44. * Returns the sheets in the file in the same shape as GoogleSheetImportService.
  45. *
  46. * @return list<array{gid: string, title: string}>
  47. */
  48. public function sheets(string $filename): array
  49. {
  50. $path = $this->guardedPath($filename);
  51. $ext = strtolower(pathinfo($filename, PATHINFO_EXTENSION));
  52. if ($ext === 'csv') {
  53. return [['gid' => '0', 'title' => 'CSV data']];
  54. }
  55. return $this->xlsxSheets($path);
  56. }
  57. // ── Row data ──────────────────────────────────────────────────────────────
  58. /**
  59. * Returns rows for the given sheet.
  60. * For CSV the gid is ignored (only one sheet).
  61. * For xlsx the gid is the 0-based sheet index returned by sheets().
  62. *
  63. * @return array{headers: list<string>, rows: list<array<string, string>>}
  64. */
  65. public function rows(string $filename, string $gid): array
  66. {
  67. $path = $this->guardedPath($filename);
  68. $ext = strtolower(pathinfo($filename, PATHINFO_EXTENSION));
  69. if ($ext === 'csv') {
  70. return $this->parseCsv((string) file_get_contents($path));
  71. }
  72. return $this->xlsxRows($path, (int) $gid);
  73. }
  74. public function delete(string $filename): void
  75. {
  76. $path = $this->tempDir . basename($filename);
  77. if (file_exists($path)) {
  78. @unlink($path);
  79. }
  80. }
  81. // ── xlsx parsing ──────────────────────────────────────────────────────────
  82. /**
  83. * @return list<array{gid: string, title: string}>
  84. */
  85. private function xlsxSheets(string $path): array
  86. {
  87. $zip = $this->openZip($path);
  88. try {
  89. $xml = $zip->getFromName('xl/workbook.xml');
  90. if ($xml === false) {
  91. throw new RuntimeException('Invalid xlsx file — workbook.xml not found.');
  92. }
  93. $wb = simplexml_load_string($xml);
  94. $sheets = [];
  95. $index = 0;
  96. foreach ($wb->sheets->sheet as $sheet) {
  97. $title = trim((string) $sheet['name']);
  98. if ($title !== '') {
  99. $sheets[] = ['gid' => (string) $index, 'title' => $title];
  100. $index++;
  101. }
  102. }
  103. return $sheets;
  104. } finally {
  105. $zip->close();
  106. }
  107. }
  108. /**
  109. * @return array{headers: list<string>, rows: list<array<string, string>>}
  110. */
  111. private function xlsxRows(string $path, int $sheetIndex): array
  112. {
  113. $zip = $this->openZip($path);
  114. try {
  115. // Shared strings table
  116. $sharedStrings = [];
  117. $ssXml = $zip->getFromName('xl/sharedStrings.xml');
  118. if ($ssXml !== false) {
  119. $ss = simplexml_load_string($ssXml);
  120. foreach ($ss->si as $si) {
  121. if (isset($si->t)) {
  122. $sharedStrings[] = (string) $si->t;
  123. } else {
  124. $text = '';
  125. foreach ($si->r as $r) {
  126. $text .= (string) $r->t;
  127. }
  128. $sharedStrings[] = $text;
  129. }
  130. }
  131. }
  132. // Resolve sheet file from workbook rels
  133. $sheetFile = $this->xlsxSheetFile($zip, $sheetIndex);
  134. if ($sheetFile === null) {
  135. throw new RuntimeException("Sheet index {$sheetIndex} not found in this file.");
  136. }
  137. $sheetXml = $zip->getFromName($sheetFile);
  138. if ($sheetXml === false) {
  139. throw new RuntimeException("Cannot read sheet data.");
  140. }
  141. return $this->parseSheetXml($sheetXml, $sharedStrings);
  142. } finally {
  143. $zip->close();
  144. }
  145. }
  146. private function xlsxSheetFile(\ZipArchive $zip, int $sheetIndex): ?string
  147. {
  148. $wbXml = $zip->getFromName('xl/workbook.xml');
  149. $relXml = $zip->getFromName('xl/_rels/workbook.xml.rels');
  150. if ($wbXml === false || $relXml === false) {
  151. return null;
  152. }
  153. $wb = simplexml_load_string($wbXml);
  154. $rel = simplexml_load_string($relXml);
  155. // Build rId → target map
  156. $relMap = [];
  157. foreach ($rel->Relationship as $r) {
  158. $relMap[(string) $r['Id']] = (string) $r['Target'];
  159. }
  160. $index = 0;
  161. foreach ($wb->sheets->sheet as $sheet) {
  162. if ($index === $sheetIndex) {
  163. // r:id attribute lives in the "r" namespace
  164. $rAttrs = $sheet->attributes('r', true);
  165. $rId = $rAttrs ? (string) $rAttrs['id'] : (string) $sheet['r:id'];
  166. if (isset($relMap[$rId])) {
  167. $target = $relMap[$rId];
  168. return str_starts_with($target, '/') ? ltrim($target, '/') : 'xl/' . $target;
  169. }
  170. }
  171. $index++;
  172. }
  173. return null;
  174. }
  175. /**
  176. * @param list<string> $sharedStrings
  177. * @return array{headers: list<string>, rows: list<array<string, string>>}
  178. */
  179. private function parseSheetXml(string $xml, array $sharedStrings): array
  180. {
  181. $sheet = simplexml_load_string($xml);
  182. $rawRows = [];
  183. $maxCol = 0;
  184. foreach ($sheet->sheetData->row as $row) {
  185. $rowIdx = (int) $row['r'] - 1;
  186. $cells = [];
  187. foreach ($row->c as $cell) {
  188. $ref = (string) $cell['r'];
  189. $type = (string) $cell['t'];
  190. $value = '';
  191. if (isset($cell->v)) {
  192. $v = (string) $cell->v;
  193. if ($type === 's') {
  194. $value = $sharedStrings[(int) $v] ?? '';
  195. } elseif ($type === 'inlineStr' && isset($cell->is->t)) {
  196. $value = (string) $cell->is->t;
  197. } else {
  198. $value = $v;
  199. }
  200. }
  201. $colIdx = $this->colIndex(preg_replace('/\d/', '', $ref) ?? '');
  202. $cells[$colIdx] = $value;
  203. $maxCol = max($maxCol, $colIdx);
  204. }
  205. $rawRows[$rowIdx] = $cells;
  206. }
  207. if (empty($rawRows)) {
  208. return ['headers' => [], 'rows' => []];
  209. }
  210. $minRow = min(array_keys($rawRows));
  211. $headers = [];
  212. for ($c = 0; $c <= $maxCol; $c++) {
  213. $headers[] = trim((string) ($rawRows[$minRow][$c] ?? ''));
  214. }
  215. $allRowIndexes = array_keys($rawRows);
  216. sort($allRowIndexes);
  217. $rows = [];
  218. foreach ($allRowIndexes as $ri) {
  219. if ($ri === $minRow) continue;
  220. $row = [];
  221. $hasValue = false;
  222. foreach ($headers as $c => $header) {
  223. if ($header === '') continue;
  224. $value = trim((string) ($rawRows[$ri][$c] ?? ''));
  225. $row[$header] = $value;
  226. $hasValue = $hasValue || $value !== '';
  227. }
  228. if ($hasValue) {
  229. $rows[] = $row;
  230. }
  231. }
  232. return ['headers' => $headers, 'rows' => $rows];
  233. }
  234. private function colIndex(string $col): int
  235. {
  236. $col = strtoupper($col);
  237. $index = 0;
  238. for ($i = 0, $len = strlen($col); $i < $len; $i++) {
  239. $index = $index * 26 + (ord($col[$i]) - 64);
  240. }
  241. return $index - 1;
  242. }
  243. // ── CSV parsing ───────────────────────────────────────────────────────────
  244. /**
  245. * @return array{headers: list<string>, rows: list<array<string, string>>}
  246. */
  247. private function parseCsv(string $csv): array
  248. {
  249. $handle = fopen('php://temp', 'r+');
  250. if ($handle === false) {
  251. throw new RuntimeException('Unable to parse the CSV file.');
  252. }
  253. fwrite($handle, $csv);
  254. rewind($handle);
  255. $headers = fgetcsv($handle);
  256. if ($headers === false) {
  257. fclose($handle);
  258. throw new RuntimeException('The CSV file is empty.');
  259. }
  260. $headers = array_map(
  261. static fn($h): string => trim((string) $h, " \t\n\r\0\x0B\xEF\xBB\xBF"),
  262. $headers
  263. );
  264. $rows = [];
  265. while (($values = fgetcsv($handle)) !== false) {
  266. $row = [];
  267. $hasValue = false;
  268. foreach ($headers as $i => $header) {
  269. if ($header === '') continue;
  270. $value = trim((string) ($values[$i] ?? ''));
  271. $row[$header] = $value;
  272. $hasValue = $hasValue || $value !== '';
  273. }
  274. if ($hasValue) {
  275. $rows[] = $row;
  276. }
  277. }
  278. fclose($handle);
  279. return ['headers' => $headers, 'rows' => $rows];
  280. }
  281. // ── Helpers ───────────────────────────────────────────────────────────────
  282. private function openZip(string $path): \ZipArchive
  283. {
  284. $zip = new \ZipArchive();
  285. $result = $zip->open($path);
  286. if ($result !== true) {
  287. throw new RuntimeException('Cannot open the file. Make sure it is a valid .xlsx file (error ' . $result . ').');
  288. }
  289. return $zip;
  290. }
  291. private function guardedPath(string $filename): string
  292. {
  293. // Prevent path traversal — only allow the basename.
  294. $safe = $this->tempDir . basename($filename);
  295. if (!file_exists($safe)) {
  296. throw new RuntimeException('Uploaded file not found. Please upload the file again.');
  297. }
  298. return $safe;
  299. }
  300. private function cleanup(): void
  301. {
  302. $now = time();
  303. foreach (glob($this->tempDir . '*') ?: [] as $file) {
  304. if (is_file($file) && $now - filemtime($file) > 3600) {
  305. @unlink($file);
  306. }
  307. }
  308. }
  309. }

Powered by TurnKey Linux.