From bef72f985be8b3b53736fff1fa8bf063eb1f131a Mon Sep 17 00:00:00 2001
From: Franziska Niemeyer <franziska.niemeyer@uni-bielefeld.de>
Date: Mon, 8 Aug 2022 09:27:28 +0200
Subject: [PATCH] Upload solutions for exercises_C

---
 .../Python_course_2021_exercises_C.ipynb      | 338 ++++++++++++++++++
 1 file changed, 338 insertions(+)
 create mode 100644 Exercises/solutions/Python_course_2021_exercises_C.ipynb

diff --git a/Exercises/solutions/Python_course_2021_exercises_C.ipynb b/Exercises/solutions/Python_course_2021_exercises_C.ipynb
new file mode 100644
index 0000000..0ac0325
--- /dev/null
+++ b/Exercises/solutions/Python_course_2021_exercises_C.ipynb
@@ -0,0 +1,338 @@
+{
+  "nbformat": 4,
+  "nbformat_minor": 0,
+  "metadata": {
+    "colab": {
+      "name": "Python_course_2021_exercises_C.ipynb",
+      "provenance": [],
+      "collapsed_sections": []
+    },
+    "kernelspec": {
+      "name": "python3",
+      "display_name": "Python 3"
+    }
+  },
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "FyvebZ68I8BJ"
+      },
+      "source": [
+        "# Python course 2021 - Exercises C"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "8PgJ1ymVJCIO"
+      },
+      "source": [
+        "## Part1 - file handling"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Ws7tJiEXJG8f"
+      },
+      "source": [
+        "\n",
+        "\n",
+        "---\n",
+        "1.1) Count number of sequences (number of headers) in AtCol0_Exons.fasta!\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "JzgmMxR0JVxL",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "52d24ee3-445c-4b78-9b91-52de52a9791d"
+      },
+      "source": [
+        "from google.colab import drive\n",
+        "drive.mount('/content/drive')"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "Mounted at /content/drive\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "oEJidIAEy8S5"
+      },
+      "source": [
+        "datei = open(\"/content/drive/MyDrive/PythonProgramming/AtCol0_Exons.fasta\", \"r\")\n",
+        "lines = datei.readlines()\n",
+        "datei.close()"
+      ],
+      "execution_count": null,
+      "outputs": []
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "def get_num_headers(lines):\n",
+        "  num_headers = 0\n",
+        "  for line in lines:\n",
+        "    if line:\n",
+        "      if line[0] == \">\":\n",
+        "        num_headers += 1\n",
+        "  return num_headers\n",
+        "\n",
+        "print(get_num_headers(lines))"
+      ],
+      "metadata": {
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "id": "q8s7_9qRxa_b",
+        "outputId": "ad4a42f6-4f24-42bd-8508-0d5e92d59347"
+      },
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "217183\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "rFtHoz5UKujx"
+      },
+      "source": [
+        "\n",
+        "\n",
+        "---\n",
+        "1.2) Count number of sequence lines!\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "AgMttuZlKyBg",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "88d9aa00-aa17-4778-e291-6d70cebfa67b"
+      },
+      "source": [
+        "def get_num_sequence_lines(lines):\n",
+        "  num_sequence_lines = 0\n",
+        "  for line in lines:\n",
+        "    if line:\n",
+        "      if line[0] != \">\":\n",
+        "        num_sequence_lines += 1\n",
+        "  return num_sequence_lines\n",
+        "\n",
+        "print(get_num_sequence_lines(lines))"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "916024\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "YTH3rkjJKyNm"
+      },
+      "source": [
+        "\n",
+        "\n",
+        "---\n",
+        "1.3) Count number of characters in document! (How many per line?)\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "6ECkHsa9K3-X",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "fe3ccf0a-8afa-4b7f-ba19-e3870430989f"
+      },
+      "source": [
+        "def get_num_characters(lines):\n",
+        "  num_characters = 0\n",
+        "  num_lines = 0\n",
+        "  for line in lines:\n",
+        "    num_characters += len(line)\n",
+        "    num_lines += 1\n",
+        "  return (num_characters, num_characters / num_lines)\n",
+        "\n",
+        "print(get_num_characters(lines))"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "(81803755, 72.18783064347467)\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "I9bkusUsK4HV"
+      },
+      "source": [
+        "\n",
+        "\n",
+        "---\n",
+        "1.4) How long are all contained sequences combined?\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "XC4que0hK81W",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "e77692ed-7a33-47ef-d8ee-5ac9550535fe"
+      },
+      "source": [
+        "def get_sequence_length(lines):\n",
+        "  total_sequence_length = 0\n",
+        "  for line in lines:\n",
+        "    if line:\n",
+        "      if line[0] != \">\":\n",
+        "        line = line.strip()\n",
+        "        total_sequence_length += len(line)\n",
+        "  return total_sequence_length\n",
+        "\n",
+        "print(get_sequence_length(lines))"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "64867051\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "5NnxagAWK9AP"
+      },
+      "source": [
+        "\n",
+        "\n",
+        "---\n",
+        "1.5) Calculate the average sequence length in this file!\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "metadata": {
+        "id": "MZNV3sNqLB62",
+        "colab": {
+          "base_uri": "https://localhost:8080/"
+        },
+        "outputId": "01b885d6-dc65-4b5d-f893-288015678122"
+      },
+      "source": [
+        "def get_average_sequence_length(lines):\n",
+        "  return get_sequence_length(lines) / get_num_headers(lines)\n",
+        "\n",
+        "print(get_average_sequence_length(lines))"
+      ],
+      "execution_count": null,
+      "outputs": [
+        {
+          "output_type": "stream",
+          "name": "stdout",
+          "text": [
+            "298.67462462531597\n"
+          ]
+        }
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "**Additional exercises**"
+      ],
+      "metadata": {
+        "id": "n9rZsJ5_4hTJ"
+      }
+    },
+    {
+      "cell_type": "markdown",
+      "source": [
+        "1.6) Parse the fasta file entry-wise. An entry consists of a header and the corresponding sequence (which may comprise multiple lines). The result should be a list of tuples of the form (header, sequence)."
+      ],
+      "metadata": {
+        "id": "ItrnPkVE5fsv"
+      }
+    },
+    {
+      "cell_type": "code",
+      "source": [
+        "\"\"\"\n",
+        "Parse a fasta file entry-wise as a list of tuples of the form (header, sequence).\n",
+        "\"\"\"\n",
+        "def read_fasta(file):\n",
+        "  result = []\n",
+        "\n",
+        "  header = None\n",
+        "  sequence = []\n",
+        "  for line in file:\n",
+        "    # remove all whitespace from the ends\n",
+        "    line = line.strip()\n",
+        "    if line.startswith('>'):\n",
+        "      # if you find a header return the previous FASTA block in tuple form after\n",
+        "      # concatenating the sequence lines(if there is a previous block)\n",
+        "      if header:\n",
+        "        result += [(header, ''.join(sequence))]\n",
+        "\n",
+        "      header = line\n",
+        "      sequence = []\n",
+        "    else:\n",
+        "      # current line is not a header\n",
+        "      # add line to the list of sequence lines of the current FASTA block after removing all whitespace from it\n",
+        "      sequence.append(line.translate(str.maketrans('', '', whitespace)))\n",
+        "  \n",
+        "  if header:\n",
+        "    result += [(header, ''.join(sequence))]\n",
+        "  return result"
+      ],
+      "metadata": {
+        "id": "RvF09FlO6YeT"
+      },
+      "execution_count": null,
+      "outputs": []
+    }
+  ]
+}
\ No newline at end of file
-- 
GitLab