From 4438d728445abcd098c3cb8daf80512507170243 Mon Sep 17 00:00:00 2001 From: Francesco Bigiarini Date: Sat, 23 Nov 2024 18:18:37 +0100 Subject: [PATCH] [Data Liberation] Add WXR import CLI script (#2012) --- .../bin/import/blueprint-import-wxr.json | 18 +++++++ .../data-liberation/bin/import/import-wxr.sh | 48 +++++++++++++++++++ .../playground/data-liberation/bootstrap.php | 1 + .../playground/data-liberation/phpunit.xml | 1 + .../playground/data-liberation/plugin.php | 46 ++++++------------ .../playground/data-liberation/project.json | 10 ++++ .../data-liberation/src/functions.php | 34 +++++++++++++ .../src/import/WP_Attachment_Downloader.php | 19 ++++---- .../src/import/WP_Stream_Importer.php | 11 ++++- .../tests/WPStreamImporterTests.php | 23 +++++++++ .../tests/import/blueprint-import.json | 9 ++-- .../data-liberation/tests/import/run.sh | 2 +- 12 files changed, 175 insertions(+), 47 deletions(-) create mode 100644 packages/playground/data-liberation/bin/import/blueprint-import-wxr.json create mode 100644 packages/playground/data-liberation/bin/import/import-wxr.sh create mode 100644 packages/playground/data-liberation/tests/WPStreamImporterTests.php diff --git a/packages/playground/data-liberation/bin/import/blueprint-import-wxr.json b/packages/playground/data-liberation/bin/import/blueprint-import-wxr.json new file mode 100644 index 0000000000..55ab107921 --- /dev/null +++ b/packages/playground/data-liberation/bin/import/blueprint-import-wxr.json @@ -0,0 +1,18 @@ +{ + "$schema": "../../../blueprints/public/blueprint-schema.json", + "constants": { + "WP_DEBUG": true, + "WP_DEBUG_LOG": true + }, + "login": true, + "steps": [ + { + "step": "activatePlugin", + "pluginPath": "data-liberation/plugin.php" + }, + { + "step": "runPHP", + "code": "files as $file ) {\nif ( $file->isFile() && pathinfo( $file->getPathname(), PATHINFO_EXTENSION ) === 'xml' ) {\ndata_liberation_import( $file->getPathname() );\n}\n}\n};" + } + ] +} diff --git a/packages/playground/data-liberation/bin/import/import-wxr.sh b/packages/playground/data-liberation/bin/import/import-wxr.sh new file mode 100644 index 0000000000..49cedceebd --- /dev/null +++ b/packages/playground/data-liberation/bin/import/import-wxr.sh @@ -0,0 +1,48 @@ +#!/bin/bash +# +# A script that accepts a folder and imports all WXR files into a WordPress site +# +# Usage: +# ./import-wxr.sh +# + +# Display help message +show_help() { + echo "Usage: $0 [-h|--help] " + echo "Options:" + echo " -h, --help Show this help message" +} + +# Check if no arguments were provided. If so, display help message +if [ $# -eq 0 ]; then + show_help + exit 1 +fi + +# Parse command line arguments. If an invalid argument is provided, display help message +while [[ "$1" =~ ^- && ! "$1" == "--" ]]; do case $1 in + -h | --help ) + show_help + exit 0 + ;; +esac; shift; done +if [[ "$1" == '--' ]]; then shift; fi + +# Check if filename is provided. If not, display error message. +if [ -z "$1" ]; then + echo "Error: No folder provided" + show_help + exit 1 +fi + +# Check if the file exists +if [ -d "$1" ]; then + bun ../../../cli/src/cli.ts \ + server \ + --mount=../../:/wordpress/wp-content/plugins/data-liberation \ + --mount=$1:/wordpress/wp-content/uploads/import-wxr \ + --blueprint=./blueprint-import-wxr.json +else + echo "Error: Folder '$1' does not exist" + exit 1 +fi diff --git a/packages/playground/data-liberation/bootstrap.php b/packages/playground/data-liberation/bootstrap.php index b99b02fd31..eb6d45c096 100644 --- a/packages/playground/data-liberation/bootstrap.php +++ b/packages/playground/data-liberation/bootstrap.php @@ -52,6 +52,7 @@ require_once __DIR__ . '/src/import/WP_File_Visitor_Event.php'; require_once __DIR__ . '/src/import/WP_Imported_Entity.php'; require_once __DIR__ . '/src/import/WP_Attachment_Downloader.php'; +require_once __DIR__ . '/src/import/WP_Attachment_Downloader_Event.php'; require_once __DIR__ . '/src/import/WP_Stream_Importer.php'; require_once __DIR__ . '/src/import/WP_Markdown_Importer.php'; diff --git a/packages/playground/data-liberation/phpunit.xml b/packages/playground/data-liberation/phpunit.xml index 6581bdaa36..800b55f189 100644 --- a/packages/playground/data-liberation/phpunit.xml +++ b/packages/playground/data-liberation/phpunit.xml @@ -10,6 +10,7 @@ tests/URLParserWHATWGComplianceTests.php tests/WPXMLProcessorTests.php tests/UrldecodeNTests.php + tests/WPStreamImporterTests.php diff --git a/packages/playground/data-liberation/plugin.php b/packages/playground/data-liberation/plugin.php index 3a0f15f7a4..5f383b4a69 100644 --- a/packages/playground/data-liberation/plugin.php +++ b/packages/playground/data-liberation/plugin.php @@ -25,39 +25,23 @@ return []; }); -/** - * Development debug code to run the import manually. - * @TODO: Remove this in favor of a CLI command. - */ add_action('init', function() { - return; - $wxr_path = __DIR__ . '/tests/fixtures/wxr-simple.xml'; - $importer = WP_Stream_Importer::create_for_wxr_file( - $wxr_path - ); - while($importer->next_step()) { - // ... + if ( defined( 'WP_CLI' ) && WP_CLI ) { + /** + * Import a WXR file. + * + * + * : The WXR file to import. + */ + $command = function ( $args, $assoc_args ) { + $file = $args[0]; + data_liberation_import( $file ); + }; + + // Register the WP-CLI import command. + // Example usage: wp data-liberation /path/to/file.xml + WP_CLI::add_command( 'data-liberation', $command ); } - return; - $importer->next_step(); - $paused_importer_state = $importer->get_reentrancy_cursor(); - - echo "\n\n"; - echo "moving to importer2\n"; - echo "\n\n"; - - $importer2 = WP_Stream_Importer::create_for_wxr_file( - $wxr_path, - array(), - $paused_importer_state - ); - $importer2->next_step(); - $importer2->next_step(); - $importer2->next_step(); - // $importer2->next_step(); - // var_dump($importer2); - - die("YAY"); }); // Register admin menu diff --git a/packages/playground/data-liberation/project.json b/packages/playground/data-liberation/project.json index 815d255522..44c3e7f5a5 100644 --- a/packages/playground/data-liberation/project.json +++ b/packages/playground/data-liberation/project.json @@ -50,6 +50,16 @@ ], "parallel": false } + }, + "test:wp-phpunit": { + "executor": "nx:run-commands", + "options": { + "cwd": "packages/playground/data-liberation", + "commands": [ + "bun ../cli/src/cli.ts run-blueprint --quiet --mount=./:/wordpress/wp-content/plugins/data-liberation --blueprint=./tests/import/blueprint-import.json" + ], + "parallel": false + } } } } diff --git a/packages/playground/data-liberation/src/functions.php b/packages/playground/data-liberation/src/functions.php index 0c7cfec22f..de37da5bac 100644 --- a/packages/playground/data-liberation/src/functions.php +++ b/packages/playground/data-liberation/src/functions.php @@ -191,3 +191,37 @@ function wp_visit_file_tree( $dir ) { new SplFileInfo( $dir ) ); } + +/** + * Import a WXR file. Used by the CLI. + * + * @param string $path The path to the WXR file. + * @return void + */ +function data_liberation_import( $path ): bool { + $importer = WP_Stream_Importer::create_for_wxr_file( $path ); + + if ( ! $importer ) { + return false; + } + + $is_wp_cli = defined( 'WP_CLI' ) && WP_CLI; + + if ( $is_wp_cli ) { + WP_CLI::line( "Importing from {$path}" ); + } + + while ( $importer->next_step() ) { + // Output the current stage if running in WP-CLI. + if ( $is_wp_cli ) { + $current_stage = $importer->get_current_stage(); + WP_CLI::line( "Import: stage {$current_stage}" ); + } + } + + if ( $is_wp_cli ) { + WP_CLI::success( 'Import ended' ); + } + + return true; +} diff --git a/packages/playground/data-liberation/src/import/WP_Attachment_Downloader.php b/packages/playground/data-liberation/src/import/WP_Attachment_Downloader.php index a6be3e74f0..131c568a90 100644 --- a/packages/playground/data-liberation/src/import/WP_Attachment_Downloader.php +++ b/packages/playground/data-liberation/src/import/WP_Attachment_Downloader.php @@ -26,7 +26,7 @@ public function has_pending_requests() { public function enqueue_if_not_exists( $url, $output_path ) { $this->enqueued_resource_id = null; - $output_path = $this->output_root . '/' . ltrim( $output_path, '/' ); + $output_path = ltrim( $output_path, '/' ); if ( file_exists( $output_path ) ) { // @TODO: Reconsider the return value. The enqueuing operation failed, // but overall already having a file seems like a success. @@ -99,13 +99,14 @@ public function poll() { if ( ! $this->client->await_next_event() ) { return false; } - $event = $this->client->get_event(); - $request = $this->client->get_request(); - // The request object we get from the client may be a redirect. - // Let's keep referring to the original request. - $original_request_id = $request->original_request()->id; - while ( true ) { + do { + $event = $this->client->get_event(); + $request = $this->client->get_request(); + // The request object we get from the client may be a redirect. + // Let's keep referring to the original request. + $original_request_id = $this->client->get_request()->original_request()->id; + switch ( $event ) { case Client::EVENT_GOT_HEADERS: if ( ! $request->is_redirected() ) { @@ -129,7 +130,7 @@ public function poll() { fclose( $this->fps[ $original_request_id ] ); } if ( isset( $this->output_paths[ $original_request_id ] ) ) { - $partial_file = $this->output_root . '/' . $this->output_paths[ $original_request_id ] . '.partial'; + $partial_file = $this->output_paths[ $original_request_id ] . '.partial'; if ( file_exists( $partial_file ) ) { unlink( $partial_file ); } @@ -162,7 +163,7 @@ public function poll() { } break; } - } + } while ( $this->client->await_next_event() ); return true; } diff --git a/packages/playground/data-liberation/src/import/WP_Stream_Importer.php b/packages/playground/data-liberation/src/import/WP_Stream_Importer.php index cc4ea76c30..a1d625386d 100644 --- a/packages/playground/data-liberation/src/import/WP_Stream_Importer.php +++ b/packages/playground/data-liberation/src/import/WP_Stream_Importer.php @@ -185,6 +185,15 @@ public function next_step() { } } + /** + * Get the current stage. + * + * @return string + */ + public function get_current_stage() { + return $this->stage; + } + /** * Advance the cursor to the oldest finished download. For example: * @@ -400,7 +409,7 @@ private function enqueue_attachment_download( string $raw_url, $context_path = n $enqueued = $this->downloader->enqueue_if_not_exists( $url, $output_path ); if ( $enqueued ) { - $resource_id = $this->downloader->get_last_enqueued_resource_id(); + $resource_id = $this->downloader->get_enqueued_resource_id(); $entity_cursor = $this->entity_iterator->get_reentrancy_cursor(); $this->active_downloads[ $entity_cursor ][ $resource_id ] = true; } diff --git a/packages/playground/data-liberation/tests/WPStreamImporterTests.php b/packages/playground/data-liberation/tests/WPStreamImporterTests.php new file mode 100644 index 0000000000..f99113ec07 --- /dev/null +++ b/packages/playground/data-liberation/tests/WPStreamImporterTests.php @@ -0,0 +1,23 @@ +markTestSkipped( 'Test only runs in Playground' ); + } + } + + public function test_import_simple_wxr() { + $import = data_liberation_import( __DIR__ . '/wxr/small-export.xml' ); + + $this->assertTrue( $import ); + } +} diff --git a/packages/playground/data-liberation/tests/import/blueprint-import.json b/packages/playground/data-liberation/tests/import/blueprint-import.json index d34478b3aa..5e383a2eb2 100644 --- a/packages/playground/data-liberation/tests/import/blueprint-import.json +++ b/packages/playground/data-liberation/tests/import/blueprint-import.json @@ -1,15 +1,14 @@ { "$schema": "../../../blueprints/public/blueprint-schema.json", - "constants": { - "WP_DEBUG": true, - "WP_DEBUG_DISPLAY": true, - "WP_DEBUG_LOG": true - }, "login": true, "steps": [ { "step": "activatePlugin", "pluginPath": "data-liberation/plugin.php" + }, + { + "step": "runPHP", + "code": "run($arguments);\nif ( $res !== 0 ) {\ntrigger_error('PHPUnit failed', E_USER_ERROR);\n}\n} catch (Throwable $e) {\ntrigger_error('PHPUnit failed: ' . $e->getMessage(), E_USER_ERROR);\n};" } ] } diff --git a/packages/playground/data-liberation/tests/import/run.sh b/packages/playground/data-liberation/tests/import/run.sh index 46bf1f196a..92190c011c 100644 --- a/packages/playground/data-liberation/tests/import/run.sh +++ b/packages/playground/data-liberation/tests/import/run.sh @@ -4,4 +4,4 @@ bun ../../../cli/src/cli.ts \ server \ --mount=../../:/wordpress/wp-content/plugins/data-liberation \ --mount=../../../../docs:/wordpress/wp-content/docs \ - --blueprint=/Users/cloudnik/www/Automattic/core/plugins/playground/packages/playground/data-liberation/tests/import/blueprint-import.json + --blueprint=./blueprint-import.json