diff --git a/Exercises/solutions/Python_course_2021_exercises_A.ipynb b/Exercises/solutions/Python_course_2021_exercises_A.ipynb new file mode 100644 index 0000000000000000000000000000000000000000..c5f728be60a141b989d37c34d4c1a8d1b48d6a90 --- /dev/null +++ b/Exercises/solutions/Python_course_2021_exercises_A.ipynb @@ -0,0 +1,650 @@ +{ + "nbformat": 4, + "nbformat_minor": 0, + "metadata": { + "colab": { + "name": "Python_course_2021_exercises_A.ipynb", + "provenance": [], + "collapsed_sections": [] + }, + "kernelspec": { + "name": "python3", + "display_name": "Python 3" + } + }, + "cells": [ + { + "cell_type": "markdown", + "metadata": { + "id": "D9wuSivclqKR" + }, + "source": [ + "# Python course 2021 - Exercises A" + ] + }, + { + "cell_type": "code", + "source": [ + "def print_type(variable):\n", + " print(variable)\n", + " print(type(variable))" + ], + "metadata": { + "id": "f1I3gDh_zTCK" + }, + "execution_count": null, + "outputs": [] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8nq-mSErlstG" + }, + "source": [ + "## Part1 - Variables" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Dc_9QuyRmFB0" + }, + "source": [ + "\n", + "\n", + "---\n", + "\n", + "\n", + "1.1) Save 3.14159265359 in a variable of type float!" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "t0cbYO8ymZZZ", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "cfb043ee-09d0-4a94-af82-4d0e0d5182fd" + }, + "source": [ + "pi = 3.14159265359\n", + "print_type(pi)" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "3.14159265359\n", + "<class 'float'>\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "8GnX41irmahZ" + }, + "source": [ + "\n", + "\n", + "---\n", + "\n", + "1.2) Convert variable from float to integer!\n", + "\n", + "\n", + "\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "IF5LB4a1mrIq", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "83acbab4-9a14-46ea-b889-1ec73523ae15" + }, + "source": [ + "pi = int(pi)\n", + "print_type(pi)" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "3\n", + "<class 'int'>\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "Wpg5yXPMmx9S" + }, + "source": [ + "\n", + "\n", + "---\n", + "\n", + "1.3) Convert variable back! What happens?" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "MKBxedZxm4Zz", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "83943224-bcc4-4cdf-b0b0-7086fe1ae54f" + }, + "source": [ + "pi = float(pi)\n", + "print_type(pi)" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "3.0\n", + "<class 'float'>\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "The float is rounded down to the nearest integer and the decimal places are lost." + ], + "metadata": { + "id": "R5wJukl5zBpN" + } + }, + { + "cell_type": "markdown", + "metadata": { + "id": "tadPKecSm4rq" + }, + "source": [ + "\n", + "\n", + "---\n", + "1.4) Convert variable type to string!\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "1iqoYMyKm47q", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "5dbd4edb-5961-4f5c-ee43-de37cf23f43e" + }, + "source": [ + "pi = str(pi)\n", + "print_type(pi)" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "3.0\n", + "<class 'str'>\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "wpSIghwHm5Ej" + }, + "source": [ + "\n", + "\n", + "---\n", + "\n", + "1.5) Save 'Python' in a string variable!" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "N7XlJWFbnGAz", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "4c91df02-85dc-4cf4-8bc8-c65b86159466" + }, + "source": [ + "python = \"Python\"\n", + "print_type(python)" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Python\n", + "<class 'str'>\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "fBL218wOnGLa" + }, + "source": [ + "\n", + "\n", + "---\n", + "1.6) Convert variable type to float! What happens?\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "jWenjrpGnLMi", + "colab": { + "base_uri": "https://localhost:8080/", + "height": 165 + }, + "outputId": "c744a4c4-97c0-4fbd-b298-76991a22152e" + }, + "source": [ + "python = float(python)" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "error", + "ename": "ValueError", + "evalue": "ignored", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mValueError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m<ipython-input-13-fc22f6f198d4>\u001b[0m in \u001b[0;36m<module>\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mpython\u001b[0m \u001b[0;34m=\u001b[0m \u001b[0mfloat\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0mpython\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m", + "\u001b[0;31mValueError\u001b[0m: could not convert string to float: 'Python'" + ] + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "FB_ZFKiNnLVj" + }, + "source": [ + "\n", + "\n", + "---\n", + "\n", + "\n", + "1.7) What is a pitfall in regards to division when working with int/float?" + ] + }, + { + "cell_type": "code", + "source": [ + "a = 3\n", + "b = 2\n", + "\n", + "print(a / b)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Xr1jtrtgGyk1", + "outputId": "f4b2673b-b728-4436-ce29-f214c93d194f" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "1.5\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "I4ZTk_1wnVYq" + }, + "source": [ + "You need to ensure that no integer division is performed in case you do not want to loose the decimal places. However, in Python you do not need to worry about this." + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "O5qjeyUhniCs" + }, + "source": [ + "## Part2 - Functions" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "luO5CdEcnxrz" + }, + "source": [ + "Primer: 'ATGCCATGCATTCGACTACG'" + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "91zd9CgHn4f8" + }, + "source": [ + "\n", + "\n", + "---\n", + "\n", + "\n", + "2.1) Calculate length of primer and print it!" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "k3FggWgun-cW", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "608304ce-982e-408a-a104-184b56e447b3" + }, + "source": [ + "primer = \"ATGCCATGCATTCGACTACG\"\n", + "print(len(primer))" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "20\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "vilS9kFun-o6" + }, + "source": [ + "\n", + "\n", + "---\n", + "\n", + "2.2) Get number of 'G's and print it!" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "qTtPu44JoFWb", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "e0bd126d-2904-459a-efc2-a917e3771a75" + }, + "source": [ + "positions = [i for i in range(len(primer)) if primer[i] == 'G']\n", + "print(positions)\n", + "print(len(positions))" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "[2, 7, 13, 19]\n", + "4\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "RhxbY3ReoFeE" + }, + "source": [ + "\n", + "\n", + "---\n", + "2.3) Write a function to analyze the nucleotide composition of a primer and print it!\n" + ] + }, + { + "cell_type": "code", + "metadata": { + "id": "hPT9ciehoNvl", + "colab": { + "base_uri": "https://localhost:8080/" + }, + "outputId": "58c947a3-498d-4529-ff28-28628ab196c4" + }, + "source": [ + "def analyze_composition(seq):\n", + " gc_content = seq.count(\"G\") + seq.count(\"C\")\n", + " return 100 * gc_content/len(seq)\n", + "\n", + "print(\"GC content:\", round(analyze_composition(primer), 2), '%')" + ], + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "GC content: 50.0 %\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "metadata": { + "id": "nxYFsZTfoN5y" + }, + "source": [ + "\n", + "\n", + "---\n", + "2.4) Is it a suitable primer? Why (not)?\n" + ] + }, + { + "cell_type": "code", + "source": [ + "def compute_primer_properties(primer):\n", + " length = len(primer)\n", + " print(f\"Length: {length}\")\n", + "\n", + " gc_content = primer.count(\"G\") + primer.count(\"C\")\n", + " gc_content = gc_content / length\n", + " print(f\"GC content: {gc_content * 100} %\")\n", + "\n", + " temperature = 4 * (primer.count(\"G\") + primer.count(\"C\")) + 2*(primer.count(\"A\") + primer.count(\"T\"))\n", + " print(f\"Temperature: {temperature} degrees celsius\")\n", + "\n", + " gc_clamp = (primer[-1] == \"G\" or primer[-1] == \"C\") and (primer[-2] == \"G\" or primer[-2] == \"C\")\n", + " print(f\"GC clamp: {gc_clamp}\")\n", + "\n", + "compute_primer_properties(primer)" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "46tktXhRbSgF", + "outputId": "256748b6-cdda-48a5-973b-1435aacd6d6d" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "Length: 20\n", + "GC content: 50.0 %\n", + "Temperature: 60 degrees celsius\n", + "GC clamp: True\n" + ] + } + ] + }, + { + "cell_type": "markdown", + "source": [ + "The primer's properties are all in a suitable range. However, to evaluate the actual suitability of the primer, its mapping uniqueness and mapping capability to the site of interest are also relevant." + ], + "metadata": { + "id": "f5SE2eaBsLEF" + } + }, + { + "cell_type": "markdown", + "source": [ + "**Additional exercises**" + ], + "metadata": { + "id": "Yapp44wzin7Z" + } + }, + { + "cell_type": "markdown", + "source": [ + "2.5) Test if the primer contains a hairpin structure." + ], + "metadata": { + "id": "OQplpGvZieNZ" + } + }, + { + "cell_type": "code", + "source": [ + "def get_reverse_complement(sequence):\n", + " bases = {'A': 'T', 'T': 'A', 'C': 'G', 'G': 'C'}\n", + "\n", + " rev_comp = []\n", + " for i in range(len(sequence)-1, -1, -1):\n", + " rev_comp += [bases[sequence[i]]]\n", + "\n", + " return ''.join(rev_comp)\n", + "\n", + "\n", + "\"\"\"\n", + "Computes all exact matches between seq and other_seq\n", + "\n", + "this method is naive and can be optimized\n", + "output is a list of 4-tuples of the form\n", + "(start position in seq, start position in other_seq, length of the match, matching string)\n", + "\"\"\"\n", + "def get_common_substrings(min_length, seq, other_seq):\n", + " length_seq = len(seq)\n", + " length_other_seq = len(other_seq)\n", + " matches = []\n", + "\n", + " for i in range(length_seq):\n", + " for j in range(length_other_seq):\n", + " current_position_seq = i\n", + " current_position_other_seq = j\n", + " current_match_length = 0\n", + " while(current_position_seq < length_seq and current_position_other_seq < length_other_seq):\n", + " if seq[current_position_seq] == other_seq[current_position_other_seq]:\n", + " current_position_seq += 1\n", + " current_position_other_seq += 1\n", + " current_match_length += 1\n", + " else:\n", + " break\n", + " if current_match_length >= min_length:\n", + " matches += [(i, j, current_match_length, seq[i:i+current_match_length])]\n", + "\n", + " return matches\n", + "\n", + "\"\"\"\n", + "Tests whether a given sequence contains a hairpin structure\n", + "\n", + "min_length describes the minimum length of the stem of the hairpin\n", + "min_distance describes the minimum length of the loop of the hairpin\n", + "\"\"\"\n", + "def has_hairpin_structure(sequence, min_length, min_distance):\n", + " length_seq = len(sequence)\n", + "\n", + " upper = min_distance\n", + " lower = 0 - min_distance\n", + "\n", + " rev_comp = get_reverse_complement(sequence)\n", + " matches = get_common_substrings(min_length, sequence, rev_comp)\n", + "\n", + " for seq_position, rev_comp_position, match_length, _ in matches:\n", + " # find start position of second match in sequence from position in reverse complement\n", + " start_position_second_match = length_seq - rev_comp_position - match_length\n", + " # print(start_position_second_match)\n", + "\n", + " # end position of first match in sequence\n", + " end_position_first_match = seq_position + match_length\n", + " # print(end_position_first_match)\n", + "\n", + " # positions need to be at least min_distance apart\n", + " if end_position_first_match - start_position_second_match <= lower or end_position_first_match - start_position_second_match >= upper:\n", + " return True\n", + " \n", + " return False\n", + "\n", + "print(primer) \n", + "print(get_reverse_complement(primer))\n", + "print(get_common_substrings(3, primer, get_reverse_complement(primer)))\n", + "print(has_hairpin_structure(primer, 3, 3))" + ], + "metadata": { + "colab": { + "base_uri": "https://localhost:8080/" + }, + "id": "Bz3QGmNBkx9D", + "outputId": "43fb551d-b47b-4db0-df9d-ff993a6d85b4" + }, + "execution_count": null, + "outputs": [ + { + "output_type": "stream", + "name": "stdout", + "text": [ + "ATGCCATGCATTCGACTACG\n", + "CGTAGTCGAATGCATGGCAT\n", + "[(0, 9, 4, 'ATGC'), (0, 13, 3, 'ATG'), (1, 10, 3, 'TGC'), (4, 12, 4, 'CATG'), (4, 17, 3, 'CAT'), (5, 9, 6, 'ATGCAT'), (5, 13, 3, 'ATG'), (6, 10, 5, 'TGCAT'), (7, 11, 4, 'GCAT'), (7, 16, 4, 'GCAT'), (8, 12, 3, 'CAT'), (8, 17, 3, 'CAT'), (11, 5, 4, 'TCGA'), (12, 6, 3, 'CGA')]\n", + "True\n" + ] + } + ] + } + ] +} \ No newline at end of file