Skip to content
Snippets Groups Projects
Commit 92ea6476 authored by Franziska Niemeyer's avatar Franziska Niemeyer
Browse files

Upload solutions for exercises_F

parent 946d9cf4
No related branches found
No related tags found
No related merge requests found
%% Cell type:markdown id: tags:
# Python course 2021 - Exercises F
%% Cell type:markdown id: tags:
## Part1 - DNA, RNA and peptide sequences
%% Cell type:markdown id: tags:
---
1.1) Write a function to get the reverse complement (upper case letters) of a DNA sequence given in upper case letters!
%% Cell type:code id: tags:
```
def get_reverse_complement(sequence):
bases = {'A': 'T', 'T': 'A', 'C': 'G', 'G': 'C'}
rev_comp = []
for i in range(len(sequence)-1, -1, -1):
rev_comp += [bases[sequence[i]]]
return ''.join(rev_comp)
sequence_1 = "AGACGTA"
print(sequence_1)
print(get_reverse_complement(sequence_1))
sequence_2 = "TTTGACGTAT"
print(sequence_2)
print(get_reverse_complement(sequence_2))
```
%% Output
AGACGTA
TACGTCT
TTTGACGTAT
ATACGTCAAA
%% Cell type:markdown id: tags:
---
1.2) Write a function to convert a DNA sequence into a RNA sequence!
%% Cell type:code id: tags:
```
def convert_DNA_to_RNA(sequence):
rna_sequence = []
for character in sequence:
if character == "T":
rna_sequence += ["U"]
else:
rna_sequence += [character]
return ''.join(rna_sequence)
print(sequence_1)
print(convert_DNA_to_RNA(sequence_1))
print(sequence_2)
print(convert_DNA_to_RNA(sequence_2))
```
%% Output
AGACGTA
AGACGUA
TTTGACGTAT
UUUGACGUAU
%% Cell type:markdown id: tags:
---
1.3) Write a function to translate a DNA sequence into amino acids (first frame only)!
* Tip: [wiki - codon tables](https://en.wikipedia.org/wiki/DNA_and_RNA_codon_tables)
%% Cell type:code id: tags:
```
codons = {
"TTT": "F", "TTC": "F",
"TTA": "L", "TTG": "L", "CTT": "L", "CTC": "L", "CTA": "L", "CTG": "L",
"ATT": "I", "ATC": "I", "ATA": "I",
"ATG": "M",
"GTT": "V", "GTC": "V", "GTA": "V", "GTG": "V",
"TCT": "S", "TCC": "S", "TCA": "S", "TCG": "S",
"CCT": "P", "CCC": "P", "CCA": "P", "CCG": "P",
"ACT": "T", "ACC": "T", "ACA": "T", "ACG": "T",
"GCT": "A","GCC": "A", "GCA": "A", "GCG": "A",
"TAT": "Y", "TAC": "Y",
"CAT": "H", "CAC": "H",
"CAA": "Q", "CAG": "Q",
"AAT": "N", "AAC": "N",
"AAA": "K", "AAG": "K",
"GAT": "D", "GAC": "D",
"GAA": "E","GAG": "E",
"TGT": "C", "TGC": "C",
"TGG": "W",
"CGT": "R", "CGC": "R", "CGA": "R", "CGG": "R",
"AGT": "S", "AGC": "S",
"AGA": "R", "AGG": "R",
"GGT": "G", "GGC": "G", "GGA": "G", "GGG": "G",
"TGA": "*", "TAA": "*", "TAG": "*",
}
def translate(sequence):
peptide_sequence = []
for i in range(0, len(sequence) - 2, 3):
peptide_sequence += [codons[sequence[i: i+3]]]
return ''.join(peptide_sequence)
sequence = "ATGCATGGTTGAGGCGGCATGCGTCGCGATTGG"
print(translate(sequence))
```
%% Output
MHG*GGMRRDW
%% Cell type:markdown id: tags:
---
1.4) Write a function to translate DNA sequences in all 6 frames into peptide sequences! The longest peptide sequence per DNA sequence should be returned!
%% Cell type:code id: tags:
```
"""
Translates a DNA sequence in all 6 frames into peptide sequences
and yields the peptide sequences.
"""
def translate_all_frames(sequence):
for i in range(3):
yield translate(sequence[i:])
rev_comp = get_reverse_complement(sequence)
for i in range(3):
yield translate(rev_comp[i:])
"""
Find a longest valid peptide sequence, meaning one that starts with
M, in a peptide sequence.
"""
def get_longest_peptide_sequence(sequence):
longest_length = 0
longest_peptide_sequence = ""
current_sequence = []
currently_in_sequence = False
for peptide in sequence:
if currently_in_sequence:
# encounter a stop codon
if peptide == "*":
if current_sequence:
# update longest observed sequence and length if necessary
if len(current_sequence) > longest_length:
longest_length = len(current_sequence)
longest_peptide_sequence = ''.join(current_sequence)
# clear current sequence
current_sequence = []
currently_in_sequence = False
else:
# sequence extend continues
current_sequence += [peptide]
# currently not in a valid peptide sequence
else:
# encounter a start peptide
if peptide == "M":
current_sequence += [peptide]
currently_in_sequence = True
# if we are not in a valid peptide sequence and the current peptide is not
# a start peptide, simply continue
if current_sequence:
if len(current_sequence) > longest_length:
longest_length = len(current_sequence)
longest_peptide_sequence = ''.join(current_sequence)
return longest_peptide_sequence
"""
Computes a longest valid peptide sequence for each of the 6 frames and yields it.
"""
def longest_peptide_sequence_per_frame(sequence):
for frame in translate_all_frames(sequence):
yield get_longest_peptide_sequence(frame)
print("Reading frames:")
for frame in translate_all_frames(sequence):
print(frame)
print("")
print("Longest valid peptide sequence:")
print(get_longest_peptide_sequence(translate(sequence)))
print("")
print("Longest valid peptide sequence per frame:")
for longest_peptide_sequence in longest_peptide_sequence_per_frame(sequence):
print(longest_peptide_sequence)
```
%% Output
Reading frames:
MHG*GGMRRDW
CMVEAACVAI
AWLRRHASRL
PIATHAASTMH
QSRRMPPQPC
NRDACRLNHA
Longest valid peptide sequence:
MRRDW
Longest valid peptide sequence per frame:
MRRDW
MVEAACVAI
MH
MPPQPC
%% Cell type:markdown id: tags:
---
Human Dataset
1.1) Count number of unique genes from the file "genes_human_genenames_duplicates.txt" and write the gene names in a new file.
%% Cell type:code id: tags:
```
from google.colab import drive
drive.mount('/content/drive')
with open("/content/drive/MyDrive/ColabNotebooks/UniPythonCourse/Exercises/data/genes_human_genenames_duplicates.txt", 'r') as genes:
with open("/content/drive/MyDrive/ColabNotebooks/UniPythonCourse/Exercises/data/gene_names_unique_human.txt", 'w') as new_file:
unique_genes = []
line = genes.readline()
while line:
if '.' in line:
line = line[:line.find('.')] # dismiss the transcript number
if not line in unique_genes: # Check if the gene was already encountered and write it in the new file if not
unique_genes.append(line)
new_file.write(line)
line = genes.readline()
```
%% Output
Mounted at /content/drive
0% Loading or .
You are about to add 0 people to the discussion. Proceed with caution.
Finish editing this message first!
Please register or to comment