diff --git a/3_cascade/1_prototype-cascade.ipynb b/3_cascade/1_prototype-cascade.ipynb index 98df416..6599c99 100644 --- a/3_cascade/1_prototype-cascade.ipynb +++ b/3_cascade/1_prototype-cascade.ipynb @@ -164,7 +164,6 @@ "metadata": {}, "outputs": [], "source": [ - "@dataclass\n", "class CascadeTrajectory:\n", " \"\"\"A class to encasplulate a cascade trajectory\n", "\n", @@ -188,13 +187,16 @@ " self.current_timestep = 0\n", " self.last_trusted_timestep = 0\n", " \n", - " def read(self, index=':', *args, **kwargs):\n", + " def read(self, index=':', *args, **kwargs) -> list[ase.Atoms]:\n", + " \"\"\"Read the trajectory into an iterable of atoms\"\"\"\n", " return read(self.path, *args, index=index, **kwargs)\n", "\n", - " def get_untrusted_segment(self):\n", + " def get_untrusted_segment(self) -> list[ase.Atoms]:\n", + " \"\"\"Return the part of the trajectory that needs to be audited\"\"\"\n", " return read(self.path, index=f'{self.last_trusted_timestep+1}:')\n", " \n", " def trim_untrusted_segment(self):\n", + " \"\"\"Remove the part of a trajectory that failed an audit, updating timesteps as appropriate\"\"\"\n", " # todo: is there a way to do this without loading into memory?\n", " write(self.path, read(self.path, index=f':{self.last_trusted_timestep+1}'))\n", " self.current_timestep = self.last_trusted_timestep\n", @@ -336,18 +338,16 @@ "source": [] }, { - "cell_type": "code", - "execution_count": 14, - "id": "76f8685a-1cfb-4579-8373-c707bb079dc6", + "cell_type": "markdown", + "id": "fffbbce3-a5d2-4f14-9095-261309a069a3", "metadata": {}, - "outputs": [], "source": [ - "from time import perf_counter" + "## Minimum viable cascasde loop" ] }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 14, "id": "568bdb51-d2a5-4ff5-9b50-13d4bc111e5f", "metadata": {}, "outputs": [ @@ -430,21 +430,24 @@ } ], "source": [ - "seeds = [0, 1]\n", - "\n", "\n", + "# create two cascasde trajectories from the same starating point but with different seeds\n", + "seeds = [0, 1]\n", "trajectories = [CascadeTrajectory(path=f'si-diffusion-seed={s}.traj', \n", " starting=atoms.copy()) for s in seeds]\n", + "# notably, right now, the seeds have no effect since our dynamics are NVE\n", "\n", - "done = False\n", - "total_steps = 128\n", + "\n", + "total_steps = 128 # how long will our final trajectories be\n", "increment_steps = 64 # how many steps to run with ML at a time\n", - "done = False\n", - "max_iter = 10\n", - "i = 0\n", + "\n", + "# audits are random\n", "auditor = RandomAuditor(random_state=42)\n", - "threshold = 0.5\n", + "threshold = 0.5 # this is the 'score' threshold on the auditor\n", "\n", + "done = False\n", + "i = 0 # track while loop iterations\n", + "max_iter = 10 # dont go above this\n", "while not done:\n", " \n", " done_ctr = 0 # count how many trajectories are done\n", @@ -491,31 +494,47 @@ " done = done_ctr == len(trajectories) or i == max_iter" ] }, + { + "cell_type": "markdown", + "id": "ea864495-142f-4640-b1c6-eaa341dbf581", + "metadata": {}, + "source": [ + "## did those complete? " + ] + }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 15, "id": "07448032-737c-49ac-9958-2829b95b841e", "metadata": {}, "outputs": [ { "data": { "text/plain": [ - "" + "[129, 129]" ] }, - "execution_count": 19, + "execution_count": 15, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "[len(t.read()), trajectories)" + "[len(t.read()) for t in trajectories]" + ] + }, + { + "cell_type": "markdown", + "id": "96ebda21-bbbd-4e07-842b-d49e8f83ac7e", + "metadata": {}, + "source": [ + "Seems done enough for now" ] }, { "cell_type": "code", "execution_count": null, - "id": "b19a2812-cc62-4bce-9c63-d8b6ba5b1bd4", + "id": "98dfe13d-24ea-4c0a-b9e3-4fed5133b255", "metadata": {}, "outputs": [], "source": []