diff --git a/admin/acceptance.py b/admin/acceptance.py index 17f7f8c5ea..67dbdba959 100644 --- a/admin/acceptance.py +++ b/admin/acceptance.py @@ -92,11 +92,6 @@ def get_trial_environment(cluster): 'FLOCKER_ACCEPTANCE_VOLUME_BACKEND': cluster.dataset_backend.name, 'FLOCKER_ACCEPTANCE_API_CERTIFICATES_PATH': cluster.certificates_path.path, - 'FLOCKER_ACCEPTANCE_HOSTNAME_TO_PUBLIC_ADDRESS': json.dumps({ - node.private_address: node.address - for node in cluster.agent_nodes - if node.private_address is not None - }), 'FLOCKER_ACCEPTANCE_DEFAULT_VOLUME_SIZE': bytes( cluster.default_volume_size ), diff --git a/docs/releasenotes/index.rst b/docs/releasenotes/index.rst index 980bcebd15..5488753478 100644 --- a/docs/releasenotes/index.rst +++ b/docs/releasenotes/index.rst @@ -16,6 +16,7 @@ Next Release * The new :ref:`CloudFormation installer ` has been made available, to provide a far simpler installation experience for users on AWS. * The :ref:`Flocker plugin for Docker ` should support the direct volume listing and inspection functionality added to Docker 1.10. * Fixed a regression that caused block device agents to poll backend APIs like EBS too frequently in some circumstances. +* Datasets can now be moved off of shutdown EC2 instances. Previously they could only be moved off of terminated instances. This Release ============ diff --git a/flocker/acceptance/endtoend/test_dataset.py b/flocker/acceptance/endtoend/test_dataset.py index 4e6fe36402..39b773e23b 100644 --- a/flocker/acceptance/endtoend/test_dataset.py +++ b/flocker/acceptance/endtoend/test_dataset.py @@ -5,9 +5,11 @@ """ from uuid import UUID -from unittest import SkipTest, skipIf +from unittest import SkipTest from datetime import timedelta +from ipaddress import ip_address + from testtools import run_test_with from twisted.internet import reactor @@ -18,7 +20,7 @@ from ..testtools import ( require_cluster, require_moving_backend, create_dataset, DatasetBackend, - get_backend_api, verify_socket + get_backend_api, verify_socket, is_public_ip, ) @@ -109,18 +111,19 @@ def not_exists(): created.addCallback(delete_dataset) return created - @skipIf(True, - "Shutting down a node invalidates a public IP, which breaks all " - "kinds of things. So skip for now.") @require_moving_backend @run_test_with(async_runner(timeout=timedelta(minutes=6))) - @require_cluster(2) - def test_dataset_move_from_dead_node(self, cluster): + # Rackspace is buggy so this doesn't work there; once we're off + # Rackspace we should re-enable it for everything (FLOC-4001): + @require_cluster(2, required_backend=DatasetBackend.aws) + def test_dataset_move_from_dead_node(self, cluster, backend): """ A dataset can be moved from a dead node to a live node. All attributes, including the maximum size, are preserved. """ + # We could use backend argument, but we're going to drop it as + # part of FLOC-4001, so make our own. api = get_backend_api(self, cluster.cluster_uuid) if not ICloudAPI.providedBy(api): raise SkipTest( @@ -139,9 +142,18 @@ def test_dataset_move_from_dead_node(self, cluster): def startup_node(node_id): api.start_node(node_id) - # Wait for node to boot up:; we presume Flocker getting going after - # SSH is available will be pretty quick: - return loop_until(reactor, verify_socket(node.public_address, 22)) + # Wait for node to boot up: + d = loop_until( + reactor, lambda: node_id in api.list_live_nodes()) + + # Wait for it to be accessible over SSH, on theory that means + # it's ready to be used: + def wait_for_ssh(_): + ips = [ip_address(i) for i in api.list_live_nodes()[node_id]] + public_ip = [unicode(i) for i in ips if is_public_ip(i)][0] + return verify_socket(public_ip, 22) + d.addCallback(wait_for_ssh) + return d # Once created, shut down origin node and then request to move the # dataset to node2: diff --git a/flocker/acceptance/endtoend/test_installer.py b/flocker/acceptance/endtoend/test_installer.py index 60f2bf38ea..92b49e4995 100644 --- a/flocker/acceptance/endtoend/test_installer.py +++ b/flocker/acceptance/endtoend/test_installer.py @@ -326,7 +326,6 @@ def _cleanup_flocker(self): control_node=self.control_node_ip.encode('ascii'), certificates_path=local_certs_path, num_agent_nodes=2, - hostname_to_public_address={}, username='user1', ) d.addCallback( diff --git a/flocker/acceptance/testtools.py b/flocker/acceptance/testtools.py index 85a4d56342..5d40b87f3c 100644 --- a/flocker/acceptance/testtools.py +++ b/flocker/acceptance/testtools.py @@ -18,6 +18,8 @@ from docker.tls import TLSConfig +from ipaddress import ip_address + from twisted.web.http import OK, CREATED from twisted.python.filepath import FilePath from twisted.python.constants import Names, NamedConstant @@ -45,6 +47,7 @@ from ..apiclient import FlockerClient, DatasetState from ..node.script import get_backend, get_api from ..node import dockerpy_client +from ..node.agents.blockdevice import ICloudAPI from .node_scripts import SCRIPTS as NODE_SCRIPTS @@ -826,7 +829,7 @@ def get_file(self, node, path): def connected_cluster( reactor, control_node, certificates_path, num_agent_nodes, - hostname_to_public_address, username='user', + username='user', ): cluster_cert = certificates_path.child(b"cluster.crt") user_cert = certificates_path.child( @@ -869,24 +872,63 @@ def failed_query(failure): failed_query) return d agents_connected = loop_until(reactor, nodes_available) + agents_connected.addCallback(lambda _: _add_nodes(cluster)) + return agents_connected + + +def is_public_ip(ip): + """ + :param IPAddress ip: An IP address. + :return: Boolean which is true if it is a public address. + """ + return not any([ip.is_private, ip.is_link_local, ip.is_loopback]) + + +def _add_nodes(cluster): + """ + Configure the ``Node`` objects for a newly created ``Cluster`` whose + nodes are known to be alive. + + :param Cluster cluster: Cluster that still needs nodes set. + :return: ``cluster`` updated with appropriate ``nodes`` set. + """ + # By default we just trust address returned by Flocker + def default_get_public_ip(address): + return address - # Extract node hostnames from API that lists nodes. Currently we - # happen know these in advance, but in FLOC-1631 node identification - # will switch to UUIDs instead. - agents_connected.addCallback(lambda _: cluster.current_nodes()) + try: + backend = get_backend_api(None, cluster.cluster_uuid) + except SkipTest: + # Can't load backend, will have to trust Flocker's reported IPs. + get_public_ip = default_get_public_ip + else: + if ICloudAPI.providedBy(backend): + node_ips = list(set(ip_address(i) for i in ips) + for ips in backend.list_live_nodes().values()) + + def get_public_ip(address): + for ips in node_ips: + if ip_address(address) in ips: + return [unicode(ip) for ip in ips + if is_public_ip(ip)][0] + raise ValueError( + "Couldn't find address in cloud API reported IPs") + else: + get_public_ip = default_get_public_ip def node_from_dict(node): reported_hostname = node["host"] - public_address = hostname_to_public_address.get( - reported_hostname, reported_hostname) + public_address = get_public_ip(reported_hostname) return Node( uuid=node[u"uuid"], public_address=public_address.encode("ascii"), reported_hostname=reported_hostname.encode("ascii"), ) - agents_connected.addCallback(lambda nodes: cluster.set( - "nodes", map(node_from_dict, nodes))) - return agents_connected + + d = cluster.current_nodes() + d.addCallback( + lambda nodes: cluster.set("nodes", map(node_from_dict, nodes))) + return d def _get_test_cluster(reactor): @@ -914,16 +956,11 @@ def _get_test_cluster(reactor): certificates_path = FilePath( environ["FLOCKER_ACCEPTANCE_API_CERTIFICATES_PATH"]) - hostname_to_public_address_env_var = environ.get( - "FLOCKER_ACCEPTANCE_HOSTNAME_TO_PUBLIC_ADDRESS", "{}") - hostname_to_public_address = json.loads(hostname_to_public_address_env_var) - return connected_cluster( reactor, control_node, certificates_path, num_agent_nodes, - hostname_to_public_address ) diff --git a/flocker/node/agents/blockdevice.py b/flocker/node/agents/blockdevice.py index 287486cd76..8eb33f268c 100644 --- a/flocker/node/agents/blockdevice.py +++ b/flocker/node/agents/blockdevice.py @@ -1121,8 +1121,10 @@ def list_live_nodes(): This is used to figure out which nodes are dead, so that other nodes can do the detach. - :returns: A collection of ``unicode`` compute instance IDs, compatible - with those returned by ``IBlockDeviceAPI.compute_instance_id``. + :returns: A mapping of ``unicode`` compute instance IDs + (compatible with those returned by + ``IBlockDeviceAPI.compute_instance_id``) to IPs of those + nodes, also as unicode.. """ def start_node(node_id): @@ -1647,7 +1649,8 @@ def is_existing_block_device(dataset_id, path): pass if ICloudAPI.providedBy(self._underlying_blockdevice_api): - live_instances = self._underlying_blockdevice_api.list_live_nodes() + live_instances = list( + self._underlying_blockdevice_api.list_live_nodes()) else: # Can't know accurately who is alive and who is dead: live_instances = None diff --git a/flocker/node/agents/cinder.py b/flocker/node/agents/cinder.py index 6b14b0b6c2..dac04d7f48 100644 --- a/flocker/node/agents/cinder.py +++ b/flocker/node/agents/cinder.py @@ -689,8 +689,11 @@ def get_device_path(self, blockdevice_id): # ICloudAPI: def list_live_nodes(self): - return list(server.id for server in self.nova_server_manager.list() - if server.status == u'ACTIVE') + return {server.id: + list(map( + unicode, _extract_nova_server_addresses(server.addresses))) + for server in self.nova_server_manager.list() + if server.status == u'ACTIVE'} def start_node(self, node_id): server = self.nova_server_manager.get(node_id) diff --git a/flocker/node/agents/ebs.py b/flocker/node/agents/ebs.py index 90259fa9eb..ccb0ff915a 100644 --- a/flocker/node/agents/ebs.py +++ b/flocker/node/agents/ebs.py @@ -719,7 +719,7 @@ def _wait_for_volume_state_change(operation, start_time = time.time() poll_until( lambda: _reached_end_state( - operation, volume, update, time.time() - start_time, timeout + operation, volume, update, time.time() - start_time, timeout ), itertools.repeat(1) ) @@ -1363,11 +1363,14 @@ def get_device_path(self, blockdevice_id): def list_live_nodes(self): instances = self.connection.instances.filter( Filters=[{'Name': 'instance-state-name', 'Values': ['running']}]) - return list(unicode(instance.id) for instance in instances) + return {unicode(instance.id): + [unicode(instance.public_ip_address), + unicode(instance.private_ip_address)] + for instance in instances} @boto3_log def start_node(self, node_id): - self.connection.start_instances(instance_ids=[node_id]) + self.connection.instances.filter(InstanceIds=[node_id]).start() def aws_from_configuration( diff --git a/flocker/node/agents/test/test_blockdevice.py b/flocker/node/agents/test/test_blockdevice.py index 5f5e995ea5..197f2857db 100644 --- a/flocker/node/agents/test/test_blockdevice.py +++ b/flocker/node/agents/test/test_blockdevice.py @@ -97,7 +97,7 @@ _backing_file_name, ) from ....common.algebraic import tagged_union_strategy - +from ....common import get_all_ips from ... import run_state_change, in_parallel, ILocalState, IStateChange, NoOp from ...testtools import ( @@ -754,7 +754,12 @@ def __init__(self, block_api, live_nodes=()): self.live_nodes = live_nodes def list_live_nodes(self): - return [self.compute_instance_id()] + list(self.live_nodes) + result = {self.compute_instance_id(): + set(unicode(i) for i in get_all_ips() + if i != b"127.0.0.1")} + result.update({node: [u"10.1.1.{}".format(i)] + for i, node in enumerate(self.live_nodes)}) + return result def start_node(self, node_id): return @@ -5501,11 +5506,25 @@ def test_current_machine_is_live(self): self.assertIn(self.api.compute_instance_id(), live)) return d + def test_current_machine_has_appropriate_ip(self): + """ + The machine's known IP is set for the current node. + """ + local_addresses = set(unicode(i) for i in get_all_ips() + if i != b"127.0.0.1") + d = self.async_cloud_api.list_live_nodes() + d.addCallback( + lambda live: + self.assertTrue( + set(live[self.api.compute_instance_id()]).intersection( + local_addresses))) + return d + def test_list_live_nodes(self): """ ``list_live_nodes`` returns an iterable of unicode values. """ - live_nodes = self.api.list_live_nodes() + live_nodes = list(self.api.list_live_nodes()) self.assertThat(live_nodes, AllMatch(IsInstance(unicode))) return Tests