From a802202aa3fbc03707da897547f634cf3026a742 Mon Sep 17 00:00:00 2001 From: Peter Lock Date: Tue, 29 Oct 2024 15:03:16 +1000 Subject: [PATCH] issue 67: Resume search after a failure. --- classes/file_search.php | 76 ++++++++++++++++++++++++++++++++++++----- classes/helper.php | 25 ++++++++++++-- tests/helper_test.php | 36 +++++++++++++++++++ 3 files changed, 126 insertions(+), 11 deletions(-) diff --git a/classes/file_search.php b/classes/file_search.php index aaa2e91..ba48105 100644 --- a/classes/file_search.php +++ b/classes/file_search.php @@ -168,20 +168,38 @@ public static function files(files $record, string $output = '', int $limitfrom $output = $dir . '/' . $record->get_temp_filename(); } - $stream = fopen($output, 'w'); - $columnheaders = [ - 'fileid', 'courseid', 'shortname', 'contextid', 'component', 'filearea', 'itemid', 'filepath', 'filename', - 'mimetype', 'strategy', 'internal', 'replace', 'offset', 'match', - ]; - fputcsv($stream, $columnheaders); - [$whereclause, $params] = self::make_where_clause($criteria); + + // If the output file already exists, try to resume. + if (file_exists($output)) { + // This must be a resumed job. We need to append to previous output. + [$lastline, $linecount] = helper::read_last_line($output); + $resumemark = self::make_resume_mark($lastline); + $matchcount = ($linecount > 0) ? $linecount - 1 : 0; + } else { + $resumemark = ''; + $matchcount = 0; + } + if ($resumemark != '') { + $stream = fopen($output, 'a'); + $whereclause .= + " AND f.component || '-' || f.filearea || '-' || f.contextid || '-' || f.itemid || '-' || f.id >= :resumemark "; + $params['resumemark'] = $resumemark; + } else { + $stream = fopen($output, 'w'); + $columnheaders = [ + 'fileid', 'courseid', 'shortname', 'contextid', 'component', 'filearea', 'itemid', 'filepath', 'filename', + 'mimetype', 'strategy', 'internal', 'replace', 'offset', 'match', + ]; + fputcsv($stream, $columnheaders); + } + $record->set('timestart', time()); $updatetime = time(); $updatepercent = 0; - $matchcount = 0; $filecount = 0; - $totalfiles = $DB->count_records_select('files', $whereclause, $params); + $total = $DB->get_record_sql("SELECT COUNT('x') total FROM {files} f WHERE " . $whereclause, $params); + $totalfiles = $total->total; if (!empty($limitnum)) { $totalfiles = min($totalfiles, $limitnum); } @@ -568,6 +586,46 @@ public static function make_where_clause(object $criteria): array { return [$whereclause, $params]; } + + /** + * Create a where clause to facilitate resumption after a crash. + * eg AND f.component ||'-'|| f.filearea ||'-'|| > + * @param string $lastline of the previous output file. + */ + public static function make_resume_mark(string $lastline) { + // Interpret the last line as a csv line. + $csv = str_getcsv($lastline); + if (empty($csv[self::CSV_COMPONENT])) { + return ''; + } + if (empty($csv[self::CSV_FILEAREA])) { + return ''; + } + if (empty($csv[self::CSV_CONTEXTID])) { + return ''; + } + if (empty($csv[self::CSV_ITEMID])) { + return ''; + } + if (empty($csv[self::CSV_FILEID])) { + return ''; + } + if ($csv[self::CSV_COMPONENT] == 'component') { + // It looks like we got the header line. + // That is, there was no data in the csv file. + return ''; + } + $resumemark = + $csv[self::CSV_COMPONENT] . '-' . + $csv[self::CSV_FILEAREA] . '-' . + $csv[self::CSV_CONTEXTID] . '-' . + $csv[self::CSV_ITEMID] . '-' . + $csv[self::CSV_FILEID]; + + return $resumemark; + + } + } diff --git a/classes/helper.php b/classes/helper.php index 5b68b90..7e44856 100644 --- a/classes/helper.php +++ b/classes/helper.php @@ -657,7 +657,28 @@ private static function replace_all_text($table, database_column_info $column, s default: throw new moodle_exception(get_string('errorcolumntypenotsupported', 'tool_advancedreplace')); } - $DB->execute($sql, $params); + $DB->execute($sql, $params); } -} + /** + * Read the last line of a file. + * @param string $filename Name of file to be read. + * @return string $lastline The last line of the file. + * @return int $linecount The number of lines in the file. + */ + public static function read_last_line(string $filename) { + $lastline = ''; + $linecount = 0; + if (file_exists($filename)) { + $file = fopen($filename, 'r'); + $linecount = 0; + + while (false != ($buffer = fgets($file))) { + $linecount++; + $lastline = $buffer; + } + fclose($file); + } + return [$lastline, $linecount]; + } +} diff --git a/tests/helper_test.php b/tests/helper_test.php index 9b7fb08..b90b8fc 100644 --- a/tests/helper_test.php +++ b/tests/helper_test.php @@ -490,5 +490,41 @@ public function test_find_link_function(): void { $this->find_module('assign', $assign->id); } + /** + * Test reading last line of csv file. + * + * @covers \tool_advancedreplace\helper::read_last_line + */ + public function test_read_last_line(): void { + $this->resetAfterTest(); + + $tempdir = make_request_directory(); + $tempfile = $tempdir . '/testfile.txt'; + + file_put_contents($tempfile, "Line 1\n"); + [$lastline, $linecount] = helper::read_last_line($tempfile); + $this->assertEquals("Line 1\n", $lastline); + $this->assertEquals(1, $linecount); + file_put_contents($tempfile, "Line 1\nLine 2\n"); + [$lastline, $linecount] = helper::read_last_line($tempfile); + $this->assertEquals("Line 2\n", $lastline); + $this->assertEquals(2, $linecount); + + file_put_contents($tempfile, "Line 1\nLine 2"); + [$lastline, $linecount] = helper::read_last_line($tempfile); + $this->assertEquals("Line 2", $lastline); + $this->assertEquals(2, $linecount); + + file_put_contents($tempfile, ""); + [$lastline, $linecount] = helper::read_last_line($tempfile); + $this->assertEquals("", $lastline); + $this->assertEquals(0, $linecount); + + // No file to read. + [$lastline, $linecount] = helper::read_last_line($tempfile . 'garbage'); + $this->assertEquals('', $lastline); + $this->assertEquals(0, $linecount); + + } }