Skip to content

Commit

Permalink
Merge pull request #1446 from nextcloud/fix/1440/import-types
Browse files Browse the repository at this point in the history
fix and improve detection and import of ods, xlsx and csv documents
  • Loading branch information
blizzz authored Nov 27, 2024
2 parents 9ea4efb + 36179dc commit 3a06109
Show file tree
Hide file tree
Showing 15 changed files with 194 additions and 40 deletions.
4 changes: 2 additions & 2 deletions lib/Service/ColumnTypes/DatetimeDateBusiness.php
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ class DatetimeDateBusiness extends SuperBusiness implements IColumnTypeBusiness
* @return string
*/
public function parseValue($value, ?Column $column = null): string {
return json_encode($this->isValidDate($value, 'Y-m-d') ? $value : '');
return json_encode($this->isValidDate((string)$value, 'Y-m-d') ? (string)$value : '');
}

/**
Expand All @@ -26,7 +26,7 @@ public function parseValue($value, ?Column $column = null): string {
* @return bool
*/
public function canBeParsed($value, ?Column $column = null): bool {
return $this->isValidDate($value, 'Y-m-d');
return $this->isValidDate((string)$value, 'Y-m-d');
}

}
4 changes: 2 additions & 2 deletions lib/Service/ColumnTypes/DatetimeTimeBusiness.php
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ class DatetimeTimeBusiness extends SuperBusiness implements IColumnTypeBusiness
* @return string
*/
public function parseValue($value, ?Column $column = null): string {
return json_encode($this->isValidDate($value, 'H:i') ? $value : '');
return json_encode($this->isValidDate((string)$value, 'H:i') ? $value : '');
}

/**
Expand All @@ -26,7 +26,7 @@ public function parseValue($value, ?Column $column = null): string {
* @return bool
*/
public function canBeParsed($value, ?Column $column = null): bool {
return $this->isValidDate($value, 'H:i');
return $this->isValidDate((string)$value, 'H:i');
}

}
4 changes: 2 additions & 2 deletions lib/Service/ColumnTypes/SelectionCheckBusiness.php
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@
use OCA\Tables\Db\Column;

class SelectionCheckBusiness extends SuperBusiness implements IColumnTypeBusiness {
public const PATTERN_POSITIVE = ['yes', '1', true, 1, 'true'];
public const PATTERN_NEGATIVE = ['no', '0', false, 0, 'false'];
public const PATTERN_POSITIVE = ['yes', '1', true, 1, 'true', 'TRUE'];
public const PATTERN_NEGATIVE = ['no', '0', false, 0, 'false', 'FALSE'];

/**
* @param mixed $value
Expand Down
105 changes: 96 additions & 9 deletions lib/Service/ImportService.php
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@ private function getPreviewData(Worksheet $worksheet): array {
$columns[] = [
'title' => $title,
'type' => $this->rawColumnDataTypes[$colIndex]['type'],
'subtype' => $this->rawColumnDataTypes[$colIndex]['subtype'],
'subtype' => $this->rawColumnDataTypes[$colIndex]['subtype'] ?? null,
'numberDecimals' => $this->rawColumnDataTypes[$colIndex]['number_decimals'] ?? 0,
'numberPrefix' => $this->rawColumnDataTypes[$colIndex]['number_prefix'] ?? '',
'numberSuffix' => $this->rawColumnDataTypes[$colIndex]['number_suffix'] ?? '',
Expand All @@ -154,13 +154,26 @@ private function getPreviewData(Worksheet $worksheet): array {
$cellIterator = $row->getCellIterator();
$cellIterator->setIterateOnlyExistingCells(false);

foreach ($cellIterator as $cellIndex => $cell) {
foreach ($cellIterator as $cell) {
$value = $cell->getValue();
$colIndex = (int) $cellIndex;
// $cellIterator`s index is based on 1, not 0.
$colIndex = $cellIterator->getCurrentColumnIndex() - 1;
$column = $this->columns[$colIndex];

if (($column && $column->getType() === 'datetime') || (is_array($columns[$colIndex]) && $columns[$colIndex]['type'] === 'datetime')) {
$value = Date::excelToDateTimeObject($value)->format('Y-m-d H:i');
if (isset($columns[$colIndex]['subtype']) && $columns[$colIndex]['subtype'] === 'date') {
$format = 'Y-m-d';
} elseif (isset($columns[$colIndex]['subtype']) && $columns[$colIndex]['subtype'] === 'time') {
$format = 'H:i';
} else {
$format = 'Y-m-d H:i';
}

try {
$value = Date::excelToDateTimeObject($value)->format($format);
} catch (\TypeError) {
$value = (new \DateTimeImmutable($value))->format($format);
}
} elseif (($column && $column->getType() === 'number' && $column->getNumberSuffix() === '%')
|| (is_array($columns[$colIndex]) && $columns[$colIndex]['type'] === 'number' && $columns[$colIndex]['numberSuffix'] === '%')) {
$value = $value * 100;
Expand Down Expand Up @@ -285,8 +298,14 @@ public function import(?int $tableId, ?int $viewId, string $path, bool $createMi
* @throws PermissionError
*/
private function loop(Worksheet $worksheet): void {
$firstRow = $worksheet->getRowIterator()->current();
$secondRow = $worksheet->getRowIterator()->seek(2)->current();
$rowIterator = $worksheet->getRowIterator();
$firstRow = $rowIterator->current();
$rowIterator->next();
if (!$rowIterator->valid()) {
return;
}
$secondRow = $rowIterator->current();
unset($rowIterator);
$this->getColumns($firstRow, $secondRow);

if (empty(array_filter($this->columns))) {
Expand Down Expand Up @@ -361,8 +380,32 @@ private function createRow(Row $row): void {

$value = $cell->getValue();
$hasData = $hasData || !empty($value);

if ($column->getType() === 'datetime') {
$value = Date::excelToDateTimeObject($value)->format('Y-m-d H:i');
if ($column->getType() === 'datetime' && $column->getSubtype() === 'date') {
$format = 'Y-m-d';
} elseif ($column->getType() === 'datetime' && $column->getSubtype() === 'time') {
$format = 'H:i';
} else {
$format = 'Y-m-d H:i';
}
try {
$value = Date::excelToDateTimeObject($value)->format($format);
} catch (\TypeError) {
$value = (new \DateTimeImmutable($value))->format($format);
}
} elseif ($column->getType() === 'datetime' && $column->getSubtype() === 'date') {
try {
$value = Date::excelToDateTimeObject($value)->format('Y-m-d');
} catch (\TypeError) {
$value = (new \DateTimeImmutable($value))->format('Y-m-d');
}
} elseif ($column->getType() === 'datetime' && $column->getSubtype() === 'time') {
try {
$value = Date::excelToDateTimeObject($value)->format('H:i');
} catch (\TypeError) {
$value = (new \DateTimeImmutable($value))->format('H:i');
}
} elseif ($column->getType() === 'number' && $column->getNumberSuffix() === '%') {
$value = $value * 100;
} elseif ($column->getType() === 'selection' && $column->getSubtype() === 'check') {
Expand Down Expand Up @@ -414,6 +457,8 @@ private function getColumns(Row $firstRow, Row $secondRow): void {
$index = 0;
$countMatchingColumnsFromConfig = 0;
$countCreatedColumnsFromConfig = 0;
$lastCellWasEmpty = false;
$hasGapInTitles = false;
foreach ($cellIterator as $cell) {
if ($cell && $cell->getValue() !== null && $cell->getValue() !== '') {
$title = $cell->getValue();
Expand All @@ -437,14 +482,29 @@ private function getColumns(Row $firstRow, Row $secondRow): void {

// Convert data type to our data type
$dataTypes[] = $this->parseColumnDataType($secondRowCellIterator->current());
if ($lastCellWasEmpty) {
$hasGapInTitles = true;
}
$lastCellWasEmpty = false;
} else {
$this->logger->debug('No cell given or cellValue is empty while loading columns for importing');
if ($cell->getDataType() === 'null') {
// LibreOffice generated XLSX doc may have more empty columns in the first row.
// Continue without increasing error count, but leave a marker to detect gaps in titles.
$lastCellWasEmpty = true;
continue;
}
$this->countErrors++;
}
$secondRowCellIterator->next();
$index++;
}

if ($hasGapInTitles) {
$this->logger->info('Imported table is having a gap in column titles');
$this->countErrors++;
}

$this->rawColumnTitles = $titles;
$this->rawColumnDataTypes = $dataTypes;

Expand All @@ -468,9 +528,33 @@ private function parseColumnDataType(Cell $cell): array {
'subtype' => 'line',
];

if (Date::isDateTime($cell) || $originDataType === DataType::TYPE_ISO_DATE) {
try {
if ($value === false) {
throw new \Exception('We do not accept `false` here');
}
$dateValue = new \DateTimeImmutable($value);
} catch (\Exception) {
}

if (isset($dateValue)
|| Date::isDateTime($cell)
|| $originDataType === DataType::TYPE_ISO_DATE) {
// the formatted value stems from the office document and shows the original user intent
$dateAnalysis = date_parse($formattedValue);
$containsDate = $dateAnalysis['year'] !== false || $dateAnalysis['month'] !== false || $dateAnalysis['day'] !== false;
$containsTime = $dateAnalysis['hour'] !== false || $dateAnalysis['minute'] !== false || $dateAnalysis['second'] !== false;

if ($containsDate && !$containsTime) {
$subType = 'date';
} elseif (!$containsDate && $containsTime) {
$subType = 'time';
} else {
$subType = '';
}

$dataType = [
'type' => 'datetime',
'subtype' => $subType,
];
} elseif ($originDataType === DataType::TYPE_NUMERIC) {
if (str_contains($formattedValue, '%')) {
Expand Down Expand Up @@ -514,7 +598,10 @@ private function parseColumnDataType(Cell $cell): array {
'type' => 'number',
];
}
} elseif ($originDataType === DataType::TYPE_BOOL) {
} elseif ($originDataType === DataType::TYPE_BOOL
|| ($originDataType === DataType::TYPE_FORMULA
&& in_array($formattedValue, ['FALSE', 'TRUE'], true))
) {
$dataType = [
'type' => 'selection',
'subtype' => 'check',
Expand Down
2 changes: 1 addition & 1 deletion src/modules/modals/Import.vue
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
<p class="span">
{{ t('tables', 'Supported formats: xlsx, xls, csv, html, xml') }}
<br>
{{ t('tables', 'First row of the file must contain column headings.') }}
{{ t('tables', 'First row of the file must contain column headings without gaps.') }}
</p>
</div>
</RowFormWrapper>
Expand Down
51 changes: 28 additions & 23 deletions tests/integration/features/APIv1.feature
Original file line number Diff line number Diff line change
Expand Up @@ -187,31 +187,36 @@ Feature: APIv1
Then user deletes last created row
Then user "participant1" deletes table with keyword "Rows check"


@api1
Scenario: Import csv table
Given file "/import.csv" exists for user "participant1" with following data
| Col1 | Col2 | Col3 | num | emoji | special |
| Val1 | Val2 | Val3 | 1 | 💙 | Ä |
| great | news | here | 99 | ⚠️ | Ö |
Given table "Import test" with emoji "👨🏻‍💻" exists for user "participant1" as "base1"
When user imports file "/import.csv" into last created table
@api1 @import
Scenario Outline: Import a document file
Given user "participant1" uploads file "<importfile>"
And table "Import test" with emoji "👨🏻‍💻" exists for user "participant1" as "base1"
When user imports file "/<importfile>" into last created table
Then import results have the following data
| found_columns_count | 6 |
| created_columns_count | 6 |
| inserted_rows_count | 2 |
| errors_count | 0 |
Then table has at least following columns
| Col1 |
| Col2 |
| Col3 |
| num |
| emoji |
| special |
| found_columns_count | 10 |
| created_columns_count | 10 |
| inserted_rows_count | 2 |
| errors_count | 0 |
Then table has at least following typed columns
| Col1 | text |
| Col2 | text |
| Col3 | text |
| num | number |
| emoji | text |
| special | text |
| date | datetime |
| truth | selection |
Then table contains at least following rows
| Col1 | Col2 | Col3 | num | emoji | special |
| Val1 | Val2 | Val3 | 1 | 💙 | Ä |
| great | news | here | 99 | ⚠️ | Ö |
| Date and Time | Col1 | Col2 | Col3 | num | emoji | special | date | truth | time |
| 2022-02-20 08:42 | Val1 | Val2 | Val3 | 1 | 💙 | Ä | 2024-02-24 | false | 18:48 |
| 2016-06-01 13:37 | great | news | here | 99 | ⚠ | Ö | 2016-06-01 | true | 01:23 |

Examples:
| importfile |
| import-from-libreoffice.ods |
| import-from-libreoffice.xlsx |
| import-from-ms365.xlsx |
| import-from-libreoffice.csv |

@api1
Scenario: Create, edit and delete views
Expand Down
49 changes: 48 additions & 1 deletion tests/integration/features/bootstrap/FeatureContext.php
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
use GuzzleHttp\Cookie\CookieJar;
use GuzzleHttp\Exception\ClientException;
use GuzzleHttp\Exception\GuzzleException;
use GuzzleHttp\Psr7\Utils;
use PHPUnit\Framework\Assert;
use PHPUnit\Framework\ExpectationFailedException;
use Psr\Http\Message\ResponseInterface;
Expand Down Expand Up @@ -64,6 +65,8 @@ class FeatureContext implements Context {
private array $tableData = [];
private array $viewData = [];

private $importColumnData = null;

// use CommandLineTrait;
private CollectionManager $collectionManager;

Expand All @@ -89,6 +92,7 @@ public function setUp() {
* @AfterScenario
*/
public function cleanupUsers() {
$this->importColumnData = null;
$this->collectionManager->cleanUp();
foreach ($this->createdUsers as $user) {
$this->deleteUser($user);
Expand Down Expand Up @@ -467,8 +471,21 @@ public function columnsForNodeV2(string $nodeType, string $nodeName, ?TableNode
// (((((((((((((((((((((((((((( END API v2 )))))))))))))))))))))))))))))))))))


/**
* @Given user :user uploads file :file
*/
public function uploadFile(string $user, string $file): void {
$this->setCurrentUser($user);

$localFilePath = __DIR__ . '/../../resources/' . $file;

$url = sprintf('%sremote.php/dav/files/%s/%s', $this->baseUrl, $user, $file);
$body = Utils::streamFor(fopen($localFilePath, 'rb'));

$this->sendRequestFullUrl('PUT', $url, $body);

Assert::assertEquals(201, $this->response->getStatusCode());
}

// IMPORT --------------------------

Expand Down Expand Up @@ -574,7 +591,7 @@ public function checkRowsExists(TableNode $table): void {
$allValuesForColumn[] = $row[$indexForCol];
}
foreach ($table->getColumn($key) as $item) {
Assert::assertTrue(in_array($item, $allValuesForColumn));
Assert::assertTrue(in_array($item, $allValuesForColumn), sprintf('%s not in %s', $item, implode(', ', $allValuesForColumn)));
}
}
}
Expand Down Expand Up @@ -1190,6 +1207,36 @@ public function tableColumns(?TableNode $body = null): void {
}
}

/**
* @Then table has at least following typed columns
*
* @param TableNode|null $body
*/
public function tableTypedColumns(?TableNode $body = null): void {
$this->sendRequest(
'GET',
'/apps/tables/api/1/tables/'.$this->tableId.'/columns'
);

$data = $this->getDataFromResponse($this->response);
Assert::assertEquals(200, $this->response->getStatusCode());

// check if no columns exists
if ($body === null) {
Assert::assertCount(0, $data);
return;
}

$colByTitle = [];
foreach ($data as $d) {
$colByTitle[$d['title']] = $d['type'];
}
foreach ($body->getRows() as $columnData) {
Assert::assertArrayHasKey($columnData[0], $colByTitle);
Assert::assertSame($columnData[1], $colByTitle[$columnData[0]], sprintf('Column "%s" has unexpected type "%s"', $columnData[0], $colByTitle[$columnData[0]]));
}
}

/**
* @Then user deletes last created column
*/
Expand Down
3 changes: 3 additions & 0 deletions tests/integration/resources/import-from-libreoffice.csv
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
Date and Time,Col1,Col2,Col3,num,emoji,special,date,truth,time
2022-02-20 08:42,Val1,Val2,Val3,1,💙,Ä,2024-02-24,false,18:48
2016-06-01 13:37,great,news,here,99,⚠,Ö,2016-06-01,true,01:23
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
SPDX-FileCopyrightText: 2024 Nextcloud GmbH and Nextcloud contributors
SPDX-License-Identifier: AGPL-3.0-or-later

Binary file not shown.
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
SPDX-FileCopyrightText: 2024 Nextcloud GmbH and Nextcloud contributors
SPDX-License-Identifier: AGPL-3.0-or-later

Binary file not shown.
Loading

0 comments on commit 3a06109

Please sign in to comment.