Upload solutions for exercises_A

49f1bfc2 · Franziska Niemeyer · e90f9b4a · 49f1bfc2
Commit 49f1bfc2 authored 2 years ago by Franziska Niemeyer
--- a/Exercises/solutions/Python_course_2021_exercises_A.ipynb
+++ b/Exercises/solutions/Python_course_2021_exercises_A.ipynb
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "name": "Python_course_2021_exercises_A.ipynb",
+      "provenance": [],
+      "collapsed_sections": []
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    }
+  },
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "D9wuSivclqKR"
+      },
+      "source": [
+        "# Python course 2021 - Exercises A"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "def print_type(variable):\n",
+        "  print(variable)\n",
+        "  print(type(variable))"
+      ],
+      "metadata": {
+        "id": "f1I3gDh_zTCK"
+      },
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "8nq-mSErlstG"
+      },
+      "source": [
+        "## Part1 - Variables"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Dc_9QuyRmFB0"
+      },
+      "source": [
+        "\n",
+        "\n",
+        "---\n",
+        "\n",
+        "\n",
+        "1.1) Save 3.14159265359 in a variable of type float!"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "t0cbYO8ymZZZ",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "cfb043ee-09d0-4a94-af82-4d0e0d5182fd"
+      },
+      "source": [
+        "pi = 3.14159265359\n",
+        "print_type(pi)"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "3.14159265359\n",
+            "<class 'float'>\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "8GnX41irmahZ"
+      },
+      "source": [
+        "\n",
+        "\n",
+        "---\n",
+        "\n",
+        "1.2) Convert variable from float to integer!\n",
+        "\n",
+        "\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "IF5LB4a1mrIq",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "83acbab4-9a14-46ea-b889-1ec73523ae15"
+      },
+      "source": [
+        "pi = int(pi)\n",
+        "print_type(pi)"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "3\n",
+            "<class 'int'>\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Wpg5yXPMmx9S"
+      },
+      "source": [
+        "\n",
+        "\n",
+        "---\n",
+        "\n",
+        "1.3) Convert variable back! What happens?"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "MKBxedZxm4Zz",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "83943224-bcc4-4cdf-b0b0-7086fe1ae54f"
+      },
+      "source": [
+        "pi = float(pi)\n",
+        "print_type(pi)"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "3.0\n",
+            "<class 'float'>\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "The float is rounded down to the nearest integer and the decimal places are lost."
+      ],
+      "metadata": {
+        "id": "R5wJukl5zBpN"
+      }
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "tadPKecSm4rq"
+      },
+      "source": [
+        "\n",
+        "\n",
+        "---\n",
+        "1.4) Convert variable type to string!\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "1iqoYMyKm47q",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "5dbd4edb-5961-4f5c-ee43-de37cf23f43e"
+      },
+      "source": [
+        "pi = str(pi)\n",
+        "print_type(pi)"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "3.0\n",
+            "<class 'str'>\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "wpSIghwHm5Ej"
+      },
+      "source": [
+        "\n",
+        "\n",
+        "---\n",
+        "\n",
+        "1.5) Save 'Python' in a string variable!"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "N7XlJWFbnGAz",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "4c91df02-85dc-4cf4-8bc8-c65b86159466"
+      },
+      "source": [
+        "python = \"Python\"\n",
+        "print_type(python)"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Python\n",
+            "<class 'str'>\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "fBL218wOnGLa"
+      },
+      "source": [
+        "\n",
+        "\n",
+        "---\n",
+        "1.6) Convert variable type to float! What happens?\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "jWenjrpGnLMi",
+        "colab": {
+          "base_uri": "https://localhost:8080/",
+          "height": 165
+        },
+        "outputId": "c744a4c4-97c0-4fbd-b298-76991a22152e"
+      },
+      "source": [
+        "python = float(python)"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "error",
+          "ename": "ValueError",
+          "evalue": "ignored",
+          "traceback": [
+            "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+            "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
+            "\u001b[0;32m<ipython-input-13-fc22f6f198d4>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mpython\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfloat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpython\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m",
+            "\u001b[0;31mValueError\u001b[0m: could not convert string to float: 'Python'"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "FB_ZFKiNnLVj"
+      },
+      "source": [
+        "\n",
+        "\n",
+        "---\n",
+        "\n",
+        "\n",
+        "1.7) What is a pitfall in regards to division when working with int/float?"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "a = 3\n",
+        "b = 2\n",
+        "\n",
+        "print(a / b)"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "Xr1jtrtgGyk1",
+        "outputId": "f4b2673b-b728-4436-ce29-f214c93d194f"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "1.5\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "I4ZTk_1wnVYq"
+      },
+      "source": [
+        "You need to ensure that no integer division is performed in case you do not want to loose the decimal places. However, in Python you do not need to worry about this."
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "O5qjeyUhniCs"
+      },
+      "source": [
+        "## Part2 - Functions"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "luO5CdEcnxrz"
+      },
+      "source": [
+        "Primer: 'ATGCCATGCATTCGACTACG'"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "91zd9CgHn4f8"
+      },
+      "source": [
+        "\n",
+        "\n",
+        "---\n",
+        "\n",
+        "\n",
+        "2.1) Calculate length of primer and print it!"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "k3FggWgun-cW",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "608304ce-982e-408a-a104-184b56e447b3"
+      },
+      "source": [
+        "primer = \"ATGCCATGCATTCGACTACG\"\n",
+        "print(len(primer))"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "20\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "vilS9kFun-o6"
+      },
+      "source": [
+        "\n",
+        "\n",
+        "---\n",
+        "\n",
+        "2.2) Get number of 'G's and print it!"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "qTtPu44JoFWb",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "e0bd126d-2904-459a-efc2-a917e3771a75"
+      },
+      "source": [
+        "positions = [i for i in range(len(primer)) if primer[i] == 'G']\n",
+        "print(positions)\n",
+        "print(len(positions))"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "[2, 7, 13, 19]\n",
+            "4\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "RhxbY3ReoFeE"
+      },
+      "source": [
+        "\n",
+        "\n",
+        "---\n",
+        "2.3) Write a function to analyze the nucleotide composition of a primer and print it!\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "hPT9ciehoNvl",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "58c947a3-498d-4529-ff28-28628ab196c4"
+      },
+      "source": [
+        "def analyze_composition(seq):\n",
+        "  gc_content = seq.count(\"G\") + seq.count(\"C\")\n",
+        "  return 100 * gc_content/len(seq)\n",
+        "\n",
+        "print(\"GC content:\", round(analyze_composition(primer), 2), '%')"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "GC content: 50.0 %\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "nxYFsZTfoN5y"
+      },
+      "source": [
+        "\n",
+        "\n",
+        "---\n",
+        "2.4) Is it a suitable primer? Why (not)?\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "def compute_primer_properties(primer):\n",
+        "  length = len(primer)\n",
+        "  print(f\"Length: {length}\")\n",
+        "\n",
+        "  gc_content = primer.count(\"G\") + primer.count(\"C\")\n",
+        "  gc_content = gc_content / length\n",
+        "  print(f\"GC content: {gc_content * 100} %\")\n",
+        "\n",
+        "  temperature = 4 * (primer.count(\"G\") + primer.count(\"C\")) + 2*(primer.count(\"A\") + primer.count(\"T\"))\n",
+        "  print(f\"Temperature: {temperature} degrees celsius\")\n",
+        "\n",
+        "  gc_clamp = (primer[-1] == \"G\" or primer[-1] == \"C\") and (primer[-2] == \"G\" or primer[-2] == \"C\")\n",
+        "  print(f\"GC clamp: {gc_clamp}\")\n",
+        "\n",
+        "compute_primer_properties(primer)"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "46tktXhRbSgF",
+        "outputId": "256748b6-cdda-48a5-973b-1435aacd6d6d"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Length: 20\n",
+            "GC content: 50.0 %\n",
+            "Temperature: 60 degrees celsius\n",
+            "GC clamp: True\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "The primer's properties are all in a suitable range. However, to evaluate the actual suitability of the primer, its mapping uniqueness and mapping capability to the site of interest are also relevant."
+      ],
+      "metadata": {
+        "id": "f5SE2eaBsLEF"
+      }
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "**Additional exercises**"
+      ],
+      "metadata": {
+        "id": "Yapp44wzin7Z"
+      }
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "2.5) Test if the primer contains a hairpin structure."
+      ],
+      "metadata": {
+        "id": "OQplpGvZieNZ"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "def get_reverse_complement(sequence):\n",
+        "  bases = {'A': 'T', 'T': 'A', 'C': 'G', 'G': 'C'}\n",
+        "\n",
+        "  rev_comp = []\n",
+        "  for i in range(len(sequence)-1, -1, -1):\n",
+        "    rev_comp += [bases[sequence[i]]]\n",
+        "\n",
+        "  return ''.join(rev_comp)\n",
+        "\n",
+        "\n",
+        "\"\"\"\n",
+        "Computes all exact matches between seq and other_seq\n",
+        "\n",
+        "this method is naive and can be optimized\n",
+        "output is a list of 4-tuples of the form\n",
+        "(start position in seq, start position in other_seq, length of the match, matching string)\n",
+        "\"\"\"\n",
+        "def get_common_substrings(min_length, seq, other_seq):\n",
+        "  length_seq = len(seq)\n",
+        "  length_other_seq = len(other_seq)\n",
+        "  matches = []\n",
+        "\n",
+        "  for i in range(length_seq):\n",
+        "    for j in range(length_other_seq):\n",
+        "      current_position_seq = i\n",
+        "      current_position_other_seq = j\n",
+        "      current_match_length = 0\n",
+        "      while(current_position_seq < length_seq and current_position_other_seq < length_other_seq):\n",
+        "        if seq[current_position_seq] == other_seq[current_position_other_seq]:\n",
+        "          current_position_seq += 1\n",
+        "          current_position_other_seq += 1\n",
+        "          current_match_length += 1\n",
+        "        else:\n",
+        "          break\n",
+        "      if current_match_length >= min_length:\n",
+        "        matches += [(i, j, current_match_length, seq[i:i+current_match_length])]\n",
+        "\n",
+        "  return matches\n",
+        "\n",
+        "\"\"\"\n",
+        "Tests whether a given sequence contains a hairpin structure\n",
+        "\n",
+        "min_length describes the minimum length of the stem of the hairpin\n",
+        "min_distance describes the minimum length of the loop of the hairpin\n",
+        "\"\"\"\n",
+        "def has_hairpin_structure(sequence, min_length, min_distance):\n",
+        "  length_seq = len(sequence)\n",
+        "\n",
+        "  upper = min_distance\n",
+        "  lower = 0 - min_distance\n",
+        "\n",
+        "  rev_comp = get_reverse_complement(sequence)\n",
+        "  matches = get_common_substrings(min_length, sequence, rev_comp)\n",
+        "\n",
+        "  for seq_position, rev_comp_position, match_length, _ in matches:\n",
+        "    # find start position of second match in sequence from position in reverse complement\n",
+        "    start_position_second_match = length_seq - rev_comp_position - match_length\n",
+        "    # print(start_position_second_match)\n",
+        "\n",
+        "    # end position of first match in sequence\n",
+        "    end_position_first_match = seq_position + match_length\n",
+        "    # print(end_position_first_match)\n",
+        "\n",
+        "    # positions need to be at least min_distance apart\n",
+        "    if end_position_first_match - start_position_second_match <= lower or end_position_first_match - start_position_second_match >= upper:\n",
+        "      return True\n",
+        "  \n",
+        "  return False\n",
+        "\n",
+        "print(primer)  \n",
+        "print(get_reverse_complement(primer))\n",
+        "print(get_common_substrings(3, primer, get_reverse_complement(primer)))\n",
+        "print(has_hairpin_structure(primer, 3, 3))"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "Bz3QGmNBkx9D",
+        "outputId": "43fb551d-b47b-4db0-df9d-ff993a6d85b4"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "ATGCCATGCATTCGACTACG\n",
+            "CGTAGTCGAATGCATGGCAT\n",
+            "[(0, 9, 4, 'ATGC'), (0, 13, 3, 'ATG'), (1, 10, 3, 'TGC'), (4, 12, 4, 'CATG'), (4, 17, 3, 'CAT'), (5, 9, 6, 'ATGCAT'), (5, 13, 3, 'ATG'), (6, 10, 5, 'TGCAT'), (7, 11, 4, 'GCAT'), (7, 16, 4, 'GCAT'), (8, 12, 3, 'CAT'), (8, 17, 3, 'CAT'), (11, 5, 4, 'TCGA'), (12, 6, 3, 'CGA')]\n",
+            "True\n"
+          ]
+        }
+      ]
+    }
+  ]
+}
\ No newline at end of file
+%% Cell type:markdown id: tags:
+
+# Python course 2021 - Exercises A
+
+%% Cell type:code id: tags:
+
+``` 
+def print_type(variable):
+  print(variable)
+  print(type(variable))
+```
+
+%% Cell type:markdown id: tags:
+
+## Part1 - Variables
+
+%% Cell type:markdown id: tags:
+
+
+
+---
+
+
+1.1) Save 3.14159265359 in a variable of type float!
+
+%% Cell type:code id: tags:
+
+``` 
+pi = 3.14159265359
+print_type(pi)
+```
+
+%% Output
+
+    3.14159265359
+    <class 'float'>
+
+%% Cell type:markdown id: tags:
+
+
+
+---
+
+1.2) Convert variable from float to integer!
+
+
+
+
+%% Cell type:code id: tags:
+
+``` 
+pi = int(pi)
+print_type(pi)
+```
+
+%% Output
+
+    3
+    <class 'int'>
+
+%% Cell type:markdown id: tags:
+
+
+
+---
+
+1.3) Convert variable back! What happens?
+
+%% Cell type:code id: tags:
+
+``` 
+pi = float(pi)
+print_type(pi)
+```
+
+%% Output
+
+    3.0
+    <class 'float'>
+
+%% Cell type:markdown id: tags:
+
+The float is rounded down to the nearest integer and the decimal places are lost.
+
+%% Cell type:markdown id: tags:
+
+
+
+---
+1.4) Convert variable type to string!
+
+%% Cell type:code id: tags:
+
+``` 
+pi = str(pi)
+print_type(pi)
+```
+
+%% Output
+
+    3.0
+    <class 'str'>
+
+%% Cell type:markdown id: tags:
+
+
+
+---
+
+1.5) Save 'Python' in a string variable!
+
+%% Cell type:code id: tags:
+
+``` 
+python = "Python"
+print_type(python)
+```
+
+%% Output
+
+    Python
+    <class 'str'>
+
+%% Cell type:markdown id: tags:
+
+
+
+---
+1.6) Convert variable type to float! What happens?
+
+%% Cell type:code id: tags:
+
+``` 
+python = float(python)
+```
+
+%% Output
+
+    ---------------------------------------------------------------------------
+    ValueError                                Traceback (most recent call last)
+    <ipython-input-13-fc22f6f198d4> in <module>()
+    ----> 1 python = float(python)
+
+    ValueError: could not convert string to float: 'Python'
+
+%% Cell type:markdown id: tags:
+
+
+
+---
+
+
+1.7) What is a pitfall in regards to division when working with int/float?
+
+%% Cell type:code id: tags:
+
+``` 
+a = 3
+b = 2
+
+print(a / b)
+```
+
+%% Output
+
+    1.5
+
+%% Cell type:markdown id: tags:
+
+You need to ensure that no integer division is performed in case you do not want to loose the decimal places. However, in Python you do not need to worry about this.
+
+%% Cell type:markdown id: tags:
+
+## Part2 - Functions
+
+%% Cell type:markdown id: tags:
+
+Primer: 'ATGCCATGCATTCGACTACG'
+
+%% Cell type:markdown id: tags:
+
+
+
+---
+
+
+2.1) Calculate length of primer and print it!
+
+%% Cell type:code id: tags:
+
+``` 
+primer = "ATGCCATGCATTCGACTACG"
+print(len(primer))
+```
+
+%% Output
+
+    20
+
+%% Cell type:markdown id: tags:
+
+
+
+---
+
+2.2) Get number of 'G's and print it!
+
+%% Cell type:code id: tags:
+
+``` 
+positions = [i for i in range(len(primer)) if primer[i] == 'G']
+print(positions)
+print(len(positions))
+```
+
+%% Output
+
+    [2, 7, 13, 19]
+    4
+
+%% Cell type:markdown id: tags:
+
+
+
+---
+2.3) Write a function to analyze the nucleotide composition of a primer and print it!
+
+%% Cell type:code id: tags:
+
+``` 
+def analyze_composition(seq):
+  gc_content = seq.count("G") + seq.count("C")
+  return 100 * gc_content/len(seq)
+
+print("GC content:", round(analyze_composition(primer), 2), '%')
+```
+
+%% Output
+
+    GC content: 50.0 %
+
+%% Cell type:markdown id: tags:
+
+
+
+---
+2.4) Is it a suitable primer? Why (not)?
+
+%% Cell type:code id: tags:
+
+``` 
+def compute_primer_properties(primer):
+  length = len(primer)
+  print(f"Length: {length}")
+
+  gc_content = primer.count("G") + primer.count("C")
+  gc_content = gc_content / length
+  print(f"GC content: {gc_content * 100} %")
+
+  temperature = 4 * (primer.count("G") + primer.count("C")) + 2*(primer.count("A") + primer.count("T"))
+  print(f"Temperature: {temperature} degrees celsius")
+
+  gc_clamp = (primer[-1] == "G" or primer[-1] == "C") and (primer[-2] == "G" or primer[-2] == "C")
+  print(f"GC clamp: {gc_clamp}")
+
+compute_primer_properties(primer)
+```
+
+%% Output
+
+    Length: 20
+    GC content: 50.0 %
+    Temperature: 60 degrees celsius
+    GC clamp: True
+
+%% Cell type:markdown id: tags:
+
+The primer's properties are all in a suitable range. However, to evaluate the actual suitability of the primer, its mapping uniqueness and mapping capability to the site of interest are also relevant.
+
+%% Cell type:markdown id: tags:
+
+**Additional exercises**
+
+%% Cell type:markdown id: tags:
+
+2.5) Test if the primer contains a hairpin structure.
+
+%% Cell type:code id: tags:
+
+``` 
+def get_reverse_complement(sequence):
+  bases = {'A': 'T', 'T': 'A', 'C': 'G', 'G': 'C'}
+
+  rev_comp = []
+  for i in range(len(sequence)-1, -1, -1):
+    rev_comp += [bases[sequence[i]]]
+
+  return ''.join(rev_comp)
+
+
+"""
+Computes all exact matches between seq and other_seq
+
+this method is naive and can be optimized
+output is a list of 4-tuples of the form
+(start position in seq, start position in other_seq, length of the match, matching string)
+"""
+def get_common_substrings(min_length, seq, other_seq):
+  length_seq = len(seq)
+  length_other_seq = len(other_seq)
+  matches = []
+
+  for i in range(length_seq):
+    for j in range(length_other_seq):
+      current_position_seq = i
+      current_position_other_seq = j
+      current_match_length = 0
+      while(current_position_seq < length_seq and current_position_other_seq < length_other_seq):
+        if seq[current_position_seq] == other_seq[current_position_other_seq]:
+          current_position_seq += 1
+          current_position_other_seq += 1
+          current_match_length += 1
+        else:
+          break
+      if current_match_length >= min_length:
+        matches += [(i, j, current_match_length, seq[i:i+current_match_length])]
+
+  return matches
+
+"""
+Tests whether a given sequence contains a hairpin structure
+
+min_length describes the minimum length of the stem of the hairpin
+min_distance describes the minimum length of the loop of the hairpin
+"""
+def has_hairpin_structure(sequence, min_length, min_distance):
+  length_seq = len(sequence)
+
+  upper = min_distance
+  lower = 0 - min_distance
+
+  rev_comp = get_reverse_complement(sequence)
+  matches = get_common_substrings(min_length, sequence, rev_comp)
+
+  for seq_position, rev_comp_position, match_length, _ in matches:
+    # find start position of second match in sequence from position in reverse complement
+    start_position_second_match = length_seq - rev_comp_position - match_length
+    # print(start_position_second_match)
+
+    # end position of first match in sequence
+    end_position_first_match = seq_position + match_length
+    # print(end_position_first_match)
+
+    # positions need to be at least min_distance apart
+    if end_position_first_match - start_position_second_match <= lower or end_position_first_match - start_position_second_match >= upper:
+      return True
+
+  return False
+
+print(primer)
+print(get_reverse_complement(primer))
+print(get_common_substrings(3, primer, get_reverse_complement(primer)))
+print(has_hairpin_structure(primer, 3, 3))
+```
+
+%% Output
+
+    ATGCCATGCATTCGACTACG
+    CGTAGTCGAATGCATGGCAT
+    [(0, 9, 4, 'ATGC'), (0, 13, 3, 'ATG'), (1, 10, 3, 'TGC'), (4, 12, 4, 'CATG'), (4, 17, 3, 'CAT'), (5, 9, 6, 'ATGCAT'), (5, 13, 3, 'ATG'), (6, 10, 5, 'TGCAT'), (7, 11, 4, 'GCAT'), (7, 16, 4, 'GCAT'), (8, 12, 3, 'CAT'), (8, 17, 3, 'CAT'), (11, 5, 4, 'TCGA'), (12, 6, 3, 'CGA')]
+    True