Skip to content

Commit

Permalink
[Data Liberation] Add WXR import CLI script (#2012)
Browse files Browse the repository at this point in the history
  • Loading branch information
zaerl authored Nov 23, 2024
1 parent 349a179 commit 4438d72
Show file tree
Hide file tree
Showing 12 changed files with 175 additions and 47 deletions.
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
{
"$schema": "../../../blueprints/public/blueprint-schema.json",
"constants": {
"WP_DEBUG": true,
"WP_DEBUG_LOG": true
},
"login": true,
"steps": [
{
"step": "activatePlugin",
"pluginPath": "data-liberation/plugin.php"
},
{
"step": "runPHP",
"code": "<?php require_once 'wordpress/wp-load.php';\n$upload_dir = wp_upload_dir();\nforeach ( wp_visit_file_tree( $upload_dir['basedir'] . '/import-wxr' ) as $event ) {\nforeach ( $event->files as $file ) {\nif ( $file->isFile() && pathinfo( $file->getPathname(), PATHINFO_EXTENSION ) === 'xml' ) {\ndata_liberation_import( $file->getPathname() );\n}\n}\n};"
}
]
}
48 changes: 48 additions & 0 deletions packages/playground/data-liberation/bin/import/import-wxr.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
#!/bin/bash
#
# A script that accepts a folder and imports all WXR files into a WordPress site
#
# Usage:
# ./import-wxr.sh <folder-name>
#

# Display help message
show_help() {
echo "Usage: $0 [-h|--help] <folder-name>"
echo "Options:"
echo " -h, --help Show this help message"
}

# Check if no arguments were provided. If so, display help message
if [ $# -eq 0 ]; then
show_help
exit 1
fi

# Parse command line arguments. If an invalid argument is provided, display help message
while [[ "$1" =~ ^- && ! "$1" == "--" ]]; do case $1 in
-h | --help )
show_help
exit 0
;;
esac; shift; done
if [[ "$1" == '--' ]]; then shift; fi

# Check if filename is provided. If not, display error message.
if [ -z "$1" ]; then
echo "Error: No folder provided"
show_help
exit 1
fi

# Check if the file exists
if [ -d "$1" ]; then
bun ../../../cli/src/cli.ts \
server \
--mount=../../:/wordpress/wp-content/plugins/data-liberation \
--mount=$1:/wordpress/wp-content/uploads/import-wxr \
--blueprint=./blueprint-import-wxr.json
else
echo "Error: Folder '$1' does not exist"
exit 1
fi
1 change: 1 addition & 0 deletions packages/playground/data-liberation/bootstrap.php
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@
require_once __DIR__ . '/src/import/WP_File_Visitor_Event.php';
require_once __DIR__ . '/src/import/WP_Imported_Entity.php';
require_once __DIR__ . '/src/import/WP_Attachment_Downloader.php';
require_once __DIR__ . '/src/import/WP_Attachment_Downloader_Event.php';
require_once __DIR__ . '/src/import/WP_Stream_Importer.php';
require_once __DIR__ . '/src/import/WP_Markdown_Importer.php';

Expand Down
1 change: 1 addition & 0 deletions packages/playground/data-liberation/phpunit.xml
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
<file>tests/URLParserWHATWGComplianceTests.php</file>
<file>tests/WPXMLProcessorTests.php</file>
<file>tests/UrldecodeNTests.php</file>
<file>tests/WPStreamImporterTests.php</file>
</testsuite>
</testsuites>
</phpunit>
46 changes: 15 additions & 31 deletions packages/playground/data-liberation/plugin.php
Original file line number Diff line number Diff line change
Expand Up @@ -25,39 +25,23 @@
return [];
});

/**
* Development debug code to run the import manually.
* @TODO: Remove this in favor of a CLI command.
*/
add_action('init', function() {
return;
$wxr_path = __DIR__ . '/tests/fixtures/wxr-simple.xml';
$importer = WP_Stream_Importer::create_for_wxr_file(
$wxr_path
);
while($importer->next_step()) {
// ...
if ( defined( 'WP_CLI' ) && WP_CLI ) {
/**
* Import a WXR file.
*
* <file>
* : The WXR file to import.
*/
$command = function ( $args, $assoc_args ) {
$file = $args[0];
data_liberation_import( $file );
};

// Register the WP-CLI import command.
// Example usage: wp data-liberation /path/to/file.xml
WP_CLI::add_command( 'data-liberation', $command );
}
return;
$importer->next_step();
$paused_importer_state = $importer->get_reentrancy_cursor();

echo "\n\n";
echo "moving to importer2\n";
echo "\n\n";

$importer2 = WP_Stream_Importer::create_for_wxr_file(
$wxr_path,
array(),
$paused_importer_state
);
$importer2->next_step();
$importer2->next_step();
$importer2->next_step();
// $importer2->next_step();
// var_dump($importer2);

die("YAY");
});

// Register admin menu
Expand Down
10 changes: 10 additions & 0 deletions packages/playground/data-liberation/project.json
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,16 @@
],
"parallel": false
}
},
"test:wp-phpunit": {
"executor": "nx:run-commands",
"options": {
"cwd": "packages/playground/data-liberation",
"commands": [
"bun ../cli/src/cli.ts run-blueprint --quiet --mount=./:/wordpress/wp-content/plugins/data-liberation --blueprint=./tests/import/blueprint-import.json"
],
"parallel": false
}
}
}
}
34 changes: 34 additions & 0 deletions packages/playground/data-liberation/src/functions.php
Original file line number Diff line number Diff line change
Expand Up @@ -191,3 +191,37 @@ function wp_visit_file_tree( $dir ) {
new SplFileInfo( $dir )
);
}

/**
* Import a WXR file. Used by the CLI.
*
* @param string $path The path to the WXR file.
* @return void
*/
function data_liberation_import( $path ): bool {
$importer = WP_Stream_Importer::create_for_wxr_file( $path );

if ( ! $importer ) {
return false;
}

$is_wp_cli = defined( 'WP_CLI' ) && WP_CLI;

if ( $is_wp_cli ) {
WP_CLI::line( "Importing from {$path}" );
}

while ( $importer->next_step() ) {
// Output the current stage if running in WP-CLI.
if ( $is_wp_cli ) {
$current_stage = $importer->get_current_stage();
WP_CLI::line( "Import: stage {$current_stage}" );
}
}

if ( $is_wp_cli ) {
WP_CLI::success( 'Import ended' );
}

return true;
}
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ public function has_pending_requests() {
public function enqueue_if_not_exists( $url, $output_path ) {
$this->enqueued_resource_id = null;

$output_path = $this->output_root . '/' . ltrim( $output_path, '/' );
$output_path = ltrim( $output_path, '/' );
if ( file_exists( $output_path ) ) {
// @TODO: Reconsider the return value. The enqueuing operation failed,
// but overall already having a file seems like a success.
Expand Down Expand Up @@ -99,13 +99,14 @@ public function poll() {
if ( ! $this->client->await_next_event() ) {
return false;
}
$event = $this->client->get_event();
$request = $this->client->get_request();
// The request object we get from the client may be a redirect.
// Let's keep referring to the original request.
$original_request_id = $request->original_request()->id;

while ( true ) {
do {
$event = $this->client->get_event();
$request = $this->client->get_request();
// The request object we get from the client may be a redirect.
// Let's keep referring to the original request.
$original_request_id = $this->client->get_request()->original_request()->id;

switch ( $event ) {
case Client::EVENT_GOT_HEADERS:
if ( ! $request->is_redirected() ) {
Expand All @@ -129,7 +130,7 @@ public function poll() {
fclose( $this->fps[ $original_request_id ] );
}
if ( isset( $this->output_paths[ $original_request_id ] ) ) {
$partial_file = $this->output_root . '/' . $this->output_paths[ $original_request_id ] . '.partial';
$partial_file = $this->output_paths[ $original_request_id ] . '.partial';
if ( file_exists( $partial_file ) ) {
unlink( $partial_file );
}
Expand Down Expand Up @@ -162,7 +163,7 @@ public function poll() {
}
break;
}
}
} while ( $this->client->await_next_event() );

return true;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -185,6 +185,15 @@ public function next_step() {
}
}

/**
* Get the current stage.
*
* @return string
*/
public function get_current_stage() {
return $this->stage;
}

/**
* Advance the cursor to the oldest finished download. For example:
*
Expand Down Expand Up @@ -400,7 +409,7 @@ private function enqueue_attachment_download( string $raw_url, $context_path = n

$enqueued = $this->downloader->enqueue_if_not_exists( $url, $output_path );
if ( $enqueued ) {
$resource_id = $this->downloader->get_last_enqueued_resource_id();
$resource_id = $this->downloader->get_enqueued_resource_id();
$entity_cursor = $this->entity_iterator->get_reentrancy_cursor();
$this->active_downloads[ $entity_cursor ][ $resource_id ] = true;
}
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
<?php

use PHPUnit\Framework\TestCase;

/**
* Tests for the WPStreamImporter class.
*/
class WPStreamImporterTests extends TestCase {

protected function setUp(): void {
parent::setUp();

if ( ! isset( $_SERVER['SERVER_SOFTWARE'] ) || $_SERVER['SERVER_SOFTWARE'] !== 'PHP.wasm' ) {
$this->markTestSkipped( 'Test only runs in Playground' );
}
}

public function test_import_simple_wxr() {
$import = data_liberation_import( __DIR__ . '/wxr/small-export.xml' );

$this->assertTrue( $import );
}
}
Original file line number Diff line number Diff line change
@@ -1,15 +1,14 @@
{
"$schema": "../../../blueprints/public/blueprint-schema.json",
"constants": {
"WP_DEBUG": true,
"WP_DEBUG_DISPLAY": true,
"WP_DEBUG_LOG": true
},
"login": true,
"steps": [
{
"step": "activatePlugin",
"pluginPath": "data-liberation/plugin.php"
},
{
"step": "runPHP",
"code": "<?php require_once 'wordpress/wp-load.php'; $base = '/wordpress/wp-content/plugins/data-liberation/';\nrequire $base . 'vendor/autoload.php';\ntry {\n$arguments = [\n'--stderr',\n'--configuration', $base . 'phpunit.xml'\n];\n$res = (new PHPUnit\\TextUI\\Application())->run($arguments);\nif ( $res !== 0 ) {\ntrigger_error('PHPUnit failed', E_USER_ERROR);\n}\n} catch (Throwable $e) {\ntrigger_error('PHPUnit failed: ' . $e->getMessage(), E_USER_ERROR);\n};"
}
]
}
2 changes: 1 addition & 1 deletion packages/playground/data-liberation/tests/import/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -4,4 +4,4 @@ bun ../../../cli/src/cli.ts \
server \
--mount=../../:/wordpress/wp-content/plugins/data-liberation \
--mount=../../../../docs:/wordpress/wp-content/docs \
--blueprint=/Users/cloudnik/www/Automattic/core/plugins/playground/packages/playground/data-liberation/tests/import/blueprint-import.json
--blueprint=./blueprint-import.json

0 comments on commit 4438d72

Please sign in to comment.