|
- <?php
-
- declare(strict_types=1);
-
- namespace App\Services;
-
- use RuntimeException;
-
- /**
- * Reads job row data from an uploaded CSV or Excel (.xlsx) file.
- * No external libraries — CSV uses fgetcsv, xlsx uses ZipArchive + SimpleXML.
- */
- class FileImportService
- {
- private string $tempDir;
-
- public function __construct()
- {
- $this->tempDir = rtrim(sys_get_temp_dir(), '/\\') . DIRECTORY_SEPARATOR . 'ct_imports' . DIRECTORY_SEPARATOR;
-
- if (!is_dir($this->tempDir)) {
- mkdir($this->tempDir, 0700, true);
- }
- }
-
- // ── Upload ────────────────────────────────────────────────────────────────
-
- /**
- * Move an uploaded file to the temp store and return its assigned filename.
- *
- * @param array{name: string, tmp_name: string, error: int} $upload $_FILES entry
- */
- public function store(array $upload): string
- {
- if (($upload['error'] ?? UPLOAD_ERR_NO_FILE) !== UPLOAD_ERR_OK) {
- throw new RuntimeException('File upload failed (error code ' . $upload['error'] . ').');
- }
-
- $ext = strtolower(pathinfo((string) $upload['name'], PATHINFO_EXTENSION));
-
- if (!in_array($ext, ['csv', 'xlsx'], true)) {
- throw new RuntimeException('Only CSV (.csv) and Excel (.xlsx) files are supported.');
- }
-
- $filename = bin2hex(random_bytes(16)) . '.' . $ext;
- $dest = $this->tempDir . $filename;
-
- if (!move_uploaded_file((string) $upload['tmp_name'], $dest)) {
- throw new RuntimeException('Could not save the uploaded file.');
- }
-
- $this->cleanup();
-
- return $filename;
- }
-
- // ── Sheet list ────────────────────────────────────────────────────────────
-
- /**
- * Returns the sheets in the file in the same shape as GoogleSheetImportService.
- *
- * @return list<array{gid: string, title: string}>
- */
- public function sheets(string $filename): array
- {
- $path = $this->guardedPath($filename);
- $ext = strtolower(pathinfo($filename, PATHINFO_EXTENSION));
-
- if ($ext === 'csv') {
- return [['gid' => '0', 'title' => 'CSV data']];
- }
-
- return $this->xlsxSheets($path);
- }
-
- // ── Row data ──────────────────────────────────────────────────────────────
-
- /**
- * Returns rows for the given sheet.
- * For CSV the gid is ignored (only one sheet).
- * For xlsx the gid is the 0-based sheet index returned by sheets().
- *
- * @return array{headers: list<string>, rows: list<array<string, string>>}
- */
- public function rows(string $filename, string $gid): array
- {
- $path = $this->guardedPath($filename);
- $ext = strtolower(pathinfo($filename, PATHINFO_EXTENSION));
-
- if ($ext === 'csv') {
- return $this->parseCsv((string) file_get_contents($path));
- }
-
- return $this->xlsxRows($path, (int) $gid);
- }
-
- public function delete(string $filename): void
- {
- $path = $this->tempDir . basename($filename);
- if (file_exists($path)) {
- @unlink($path);
- }
- }
-
- // ── xlsx parsing ──────────────────────────────────────────────────────────
-
- /**
- * @return list<array{gid: string, title: string}>
- */
- private function xlsxSheets(string $path): array
- {
- $zip = $this->openZip($path);
-
- try {
- $xml = $zip->getFromName('xl/workbook.xml');
- if ($xml === false) {
- throw new RuntimeException('Invalid xlsx file — workbook.xml not found.');
- }
-
- $wb = simplexml_load_string($xml);
- $sheets = [];
- $index = 0;
-
- foreach ($wb->sheets->sheet as $sheet) {
- $title = trim((string) $sheet['name']);
- if ($title !== '') {
- $sheets[] = ['gid' => (string) $index, 'title' => $title];
- $index++;
- }
- }
-
- return $sheets;
- } finally {
- $zip->close();
- }
- }
-
- /**
- * @return array{headers: list<string>, rows: list<array<string, string>>}
- */
- private function xlsxRows(string $path, int $sheetIndex): array
- {
- $zip = $this->openZip($path);
-
- try {
- // Shared strings table
- $sharedStrings = [];
- $ssXml = $zip->getFromName('xl/sharedStrings.xml');
- if ($ssXml !== false) {
- $ss = simplexml_load_string($ssXml);
- foreach ($ss->si as $si) {
- if (isset($si->t)) {
- $sharedStrings[] = (string) $si->t;
- } else {
- $text = '';
- foreach ($si->r as $r) {
- $text .= (string) $r->t;
- }
- $sharedStrings[] = $text;
- }
- }
- }
-
- // Resolve sheet file from workbook rels
- $sheetFile = $this->xlsxSheetFile($zip, $sheetIndex);
- if ($sheetFile === null) {
- throw new RuntimeException("Sheet index {$sheetIndex} not found in this file.");
- }
-
- $sheetXml = $zip->getFromName($sheetFile);
- if ($sheetXml === false) {
- throw new RuntimeException("Cannot read sheet data.");
- }
-
- return $this->parseSheetXml($sheetXml, $sharedStrings);
- } finally {
- $zip->close();
- }
- }
-
- private function xlsxSheetFile(\ZipArchive $zip, int $sheetIndex): ?string
- {
- $wbXml = $zip->getFromName('xl/workbook.xml');
- $relXml = $zip->getFromName('xl/_rels/workbook.xml.rels');
-
- if ($wbXml === false || $relXml === false) {
- return null;
- }
-
- $wb = simplexml_load_string($wbXml);
- $rel = simplexml_load_string($relXml);
-
- // Build rId → target map
- $relMap = [];
- foreach ($rel->Relationship as $r) {
- $relMap[(string) $r['Id']] = (string) $r['Target'];
- }
-
- $index = 0;
- foreach ($wb->sheets->sheet as $sheet) {
- if ($index === $sheetIndex) {
- // r:id attribute lives in the "r" namespace
- $rAttrs = $sheet->attributes('r', true);
- $rId = $rAttrs ? (string) $rAttrs['id'] : (string) $sheet['r:id'];
-
- if (isset($relMap[$rId])) {
- $target = $relMap[$rId];
- return str_starts_with($target, '/') ? ltrim($target, '/') : 'xl/' . $target;
- }
- }
- $index++;
- }
-
- return null;
- }
-
- /**
- * @param list<string> $sharedStrings
- * @return array{headers: list<string>, rows: list<array<string, string>>}
- */
- private function parseSheetXml(string $xml, array $sharedStrings): array
- {
- $sheet = simplexml_load_string($xml);
- $rawRows = [];
- $maxCol = 0;
-
- foreach ($sheet->sheetData->row as $row) {
- $rowIdx = (int) $row['r'] - 1;
- $cells = [];
-
- foreach ($row->c as $cell) {
- $ref = (string) $cell['r'];
- $type = (string) $cell['t'];
- $value = '';
-
- if (isset($cell->v)) {
- $v = (string) $cell->v;
- if ($type === 's') {
- $value = $sharedStrings[(int) $v] ?? '';
- } elseif ($type === 'inlineStr' && isset($cell->is->t)) {
- $value = (string) $cell->is->t;
- } else {
- $value = $v;
- }
- }
-
- $colIdx = $this->colIndex(preg_replace('/\d/', '', $ref) ?? '');
- $cells[$colIdx] = $value;
- $maxCol = max($maxCol, $colIdx);
- }
-
- $rawRows[$rowIdx] = $cells;
- }
-
- if (empty($rawRows)) {
- return ['headers' => [], 'rows' => []];
- }
-
- $minRow = min(array_keys($rawRows));
- $headers = [];
- for ($c = 0; $c <= $maxCol; $c++) {
- $headers[] = trim((string) ($rawRows[$minRow][$c] ?? ''));
- }
-
- $allRowIndexes = array_keys($rawRows);
- sort($allRowIndexes);
- $rows = [];
-
- foreach ($allRowIndexes as $ri) {
- if ($ri === $minRow) continue;
-
- $row = [];
- $hasValue = false;
-
- foreach ($headers as $c => $header) {
- if ($header === '') continue;
- $value = trim((string) ($rawRows[$ri][$c] ?? ''));
- $row[$header] = $value;
- $hasValue = $hasValue || $value !== '';
- }
-
- if ($hasValue) {
- $rows[] = $row;
- }
- }
-
- return ['headers' => $headers, 'rows' => $rows];
- }
-
- private function colIndex(string $col): int
- {
- $col = strtoupper($col);
- $index = 0;
-
- for ($i = 0, $len = strlen($col); $i < $len; $i++) {
- $index = $index * 26 + (ord($col[$i]) - 64);
- }
-
- return $index - 1;
- }
-
- // ── CSV parsing ───────────────────────────────────────────────────────────
-
- /**
- * @return array{headers: list<string>, rows: list<array<string, string>>}
- */
- private function parseCsv(string $csv): array
- {
- $handle = fopen('php://temp', 'r+');
- if ($handle === false) {
- throw new RuntimeException('Unable to parse the CSV file.');
- }
-
- fwrite($handle, $csv);
- rewind($handle);
-
- $headers = fgetcsv($handle);
- if ($headers === false) {
- fclose($handle);
- throw new RuntimeException('The CSV file is empty.');
- }
-
- $headers = array_map(
- static fn($h): string => trim((string) $h, " \t\n\r\0\x0B\xEF\xBB\xBF"),
- $headers
- );
-
- $rows = [];
- while (($values = fgetcsv($handle)) !== false) {
- $row = [];
- $hasValue = false;
-
- foreach ($headers as $i => $header) {
- if ($header === '') continue;
- $value = trim((string) ($values[$i] ?? ''));
- $row[$header] = $value;
- $hasValue = $hasValue || $value !== '';
- }
-
- if ($hasValue) {
- $rows[] = $row;
- }
- }
-
- fclose($handle);
-
- return ['headers' => $headers, 'rows' => $rows];
- }
-
- // ── Helpers ───────────────────────────────────────────────────────────────
-
- private function openZip(string $path): \ZipArchive
- {
- $zip = new \ZipArchive();
- $result = $zip->open($path);
-
- if ($result !== true) {
- throw new RuntimeException('Cannot open the file. Make sure it is a valid .xlsx file (error ' . $result . ').');
- }
-
- return $zip;
- }
-
- private function guardedPath(string $filename): string
- {
- // Prevent path traversal — only allow the basename.
- $safe = $this->tempDir . basename($filename);
-
- if (!file_exists($safe)) {
- throw new RuntimeException('Uploaded file not found. Please upload the file again.');
- }
-
- return $safe;
- }
-
- private function cleanup(): void
- {
- $now = time();
- foreach (glob($this->tempDir . '*') ?: [] as $file) {
- if (is_file($file) && $now - filemtime($file) > 3600) {
- @unlink($file);
- }
- }
- }
- }
|