Miscellaneous fixes to spring20 (yandexdataschool#360)

* [Week 1] Revert use of Video from IPython since it does not exist in Colab * [Week 1] Add policy() funtion and improve style of the Gym Interface notebook * [Week 4] Improve formatting of the DQN Atari TF notebook * [Week 7] Fix typo in POMDP TF practice notebook * [Week 4] Replace broken import of scipy.misc.imresize with skimage.transform.resize * [Week 6] Style fixes for Reinforce TF notebook * Add spaces around plus sign in video file creation * [Week 1] Reformulate gym_interface notebook for seminar compatibility * [Week 4] Undo more redundant line breaks in Week 4 Atari DQN TF
aps2019project · Apr 12, 2020 · 693465a · 693465a
1 parent a91f8a1
commit 693465a
Show file tree

Hide file tree

Showing 14 changed files with 127 additions and 110 deletions.
diff --git a/week01_intro/deep_crossentropy_method.ipynb b/week01_intro/deep_crossentropy_method.ipynb
@@ -270,12 +270,19 @@
    "outputs": [],
    "source": [
     "# show video\n",
-    "from IPython.display import Video\n",
+    "from IPython.display import HTML\n",
     "import os\n",
     "\n",
-    "video_names = list(filter(lambda s: s.endswith(\".mp4\"), os.listdir(\"./videos/\")))\n",
+    "video_names = [\n",
+    "    s for s in os.listdir(\"./videos/\")\n",
+    "    if s.endswith(\".mp4\")\n",
+    "]\n",
     "\n",
-    "Video(\"./videos/\"+video_names[-1])  # this may or may not be _last_ video. Try other indices"
+    "HTML(\"\"\"\n",
+    "<video width=\"640\" height=\"480\" controls>\n",
+    "  <source src=\"{}\" type=\"video/mp4\">\n",
+    "</video>\n",
+    "\"\"\".format(\"./videos/\" + video_names[-1]))  # this may or may not be the _last_ video. Try other indices"
    ]
   },
   {

diff --git a/week01_intro/seminar_gym_interface.ipynb b/week01_intro/seminar_gym_interface.ipynb
@@ -115,15 +115,11 @@
    "source": [
     "### Play with it\n",
     "\n",
-    "Below is the code that drives the car to the right. \n",
-    "\n",
-    "However, it doesn't reach the flag at the far right due to gravity. \n",
+    "Below is the code that drives the car to the right. However, if you simply use the default policy, the car will not reach the flag at the far right due to gravity.\n",
     "\n",
     "__Your task__ is to fix it. Find a strategy that reaches the flag. \n",
     "\n",
-    "You're not required to build any sophisticated algorithms for now, feel free to hard-code :)\n",
-    "\n",
-    "__Hint__: your action at each step should depend either on `t` or on `s`."
+    "You are not required to build any sophisticated algorithms for now, feel free to hard-code :)"
    ]
   },
   {
@@ -134,25 +130,52 @@
    "source": [
     "from IPython import display\n",
     "\n",
-    "# create env manually to set time limit. Please don't change this.\n",
+    "# Create env manually to set time limit. Please don't change this.\n",
     "TIME_LIMIT = 250\n",
     "env = gym.wrappers.TimeLimit(\n",
     "    gym.envs.classic_control.MountainCarEnv(),\n",
     "    max_episode_steps=TIME_LIMIT + 1,\n",
     ")\n",
-    "s = env.reset()\n",
-    "actions = {'left': 0, 'stop': 1, 'right': 2}\n",
-    "\n",
+    "actions = {'left': 0, 'stop': 1, 'right': 2}"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def policy(obs, t):\n",
+    "    # Write the code for your policy here. You can use the observation\n",
+    "    # (a tuple of position and velocity), the current time step, or both,\n",
+    "    # if you want.\n",
+    "    position, velocity = obs\n",
+    "    \n",
+    "    # This is an example policy. You can try running it, but it will not work.\n",
+    "    # Your goal is to fix that.\n",
+    "    return actions['right']"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
     "plt.figure(figsize=(4, 3))\n",
     "display.clear_output(wait=True)\n",
     "\n",
+    "obs = env.reset()\n",
     "for t in range(TIME_LIMIT):\n",
     "    plt.gca().clear()\n",
     "    \n",
-    "    # change the line below to reach the flag\n",
-    "    s, r, done, _ = env.step(actions['right'])\n",
+    "    action = policy(obs, t)  # Call your policy\n",
+    "    obs, reward, done, _ = env.step(action)  # Pass the action chosen by the policy to the environment\n",
+    "    \n",
+    "    # We don't do anything with reward here because MountainCar is a very simple environment,\n",
+    "    # and reward is a constant -1. Therefore, your goal is to end the episode as quickly as possible.\n",
     "\n",
-    "    # draw game image on display\n",
+    "    # Draw game image on display.\n",
     "    plt.imshow(env.render('rgb_array'))\n",
     "    \n",
     "    display.clear_output(wait=True)\n",
@@ -173,7 +196,7 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "assert s[0] > 0.47\n",
+    "assert obs[0] > 0.47\n",
     "print(\"You solved it!\")"
    ]
   }

diff --git a/week04_approx_rl/homework_lasagne.ipynb b/week04_approx_rl/homework_lasagne.ipynb
@@ -480,7 +480,7 @@
     "<video width=\"640\" height=\"480\" controls>\n",
     "  <source src=\"{}\" type=\"video/mp4\">\n",
     "</video>\n",
-    "\"\"\".format(\"./records/\"+video_names[-1]))  # this may or may not be _last_ video. Try other indices"
+    "\"\"\".format(\"./records/\" + video_names[-1]))  # this may or may not be _last_ video. Try other indices"
    ]
   },
   {
@@ -609,7 +609,7 @@
     "<video width=\"640\" height=\"480\" controls>\n",
     "  <source src=\"{}\" type=\"video/mp4\">\n",
     "</video>\n",
-    "\"\"\".format(\"./videos/\"+video_names[-1]))  # this may or may not be _last_ video. Try other indices"
+    "\"\"\".format(\"./videos/\" + video_names[-1]))  # this may or may not be _last_ video. Try other indices"
    ]
   },
   {

diff --git a/week04_approx_rl/homework_pytorch_main.ipynb b/week04_approx_rl/homework_pytorch_main.ipynb
@@ -1154,7 +1154,7 @@
     "<video width=\"640\" height=\"480\" controls>\n",
     "  <source src=\"{}\" type=\"video/mp4\">\n",
     "</video>\n",
-    "\"\"\".format(\"./videos/\"+video_names[-1]))  # this may or may not be _last_ video. Try other indices"
+    "\"\"\".format(\"./videos/\" + video_names[-1]))  # this may or may not be _last_ video. Try other indices"
    ]
   },
   {