Procedure
library("stringr")
findPotentialStartsAndStops<- function(sequence) # creating function
{
# Define a vector with the sequences of potential start and stop codons
codons<- c("atg", "taa", "tag", "tga")
# Find the number of occurrences of each type of potential start or stop codon
for (i in 1:4)
{
codon<- codons[i]
# Find all occurrences of codon "codon" in sequence "sequence"
occurrences<- as.data.frame(str_locate_all(sequence,codon))
# Find the start positions of all occurrences of "codon" in sequence "sequence"
codonpositions<- c(occurrences[[1]])
# Find the total number of potential start and stop codons in sequence "sequence"
numoccurrences<- length(codonpositions)
if (i == 1){
# Make a copy of vector "codonpositions" called "positions"
positions<- codonpositions
# Make a vector "types" containing "numoccurrences" copies of "codon"
types<- rep(codon, numoccurrences)
}
else
{
# Add the vector "codonpositions" to the end of vector "positions":
positions<- append(positions, codonpositions,after=length(positions))
# Add the vector "rep(codon, numoccurrences)" to the end of vector "types":
types<- append(types, rep(codon, numoccurrences),after=length(types))
}
}
# Sort the vectors "positions" and "types" in order of position along the input sequence:
indices<- order(positions)
positions<- positions[indices]
types<- types[indices]
# Return a list variable including vectors "positions" and "types":
mylist<- list(positions,types)
return(mylist)
}
s1 <- "aaaatgcagtaacccatgccc"
findPotentialStartsAndStops(s1)
Procedure to work Simulator
5'AGTTGATGTAGTCTACGTGGACCGACAAGAACAGTTTCGAATCGGAAGCTTGCTTAACGTAGTTC TAACAGTTTTTTATTAGAGAGCAGATCTCTGATGAACAACCAACGGAAAAAGACGGGTCGACCGTGA3'
- The number of start codon and stop codons in the given sequence is displayed as result. Start codon indicates the site for initiation of translation into protein sequence and the stop codon indicates the site for terminating translation process.
- A default sequence file is provided in the user interface. User can download the file and can use the file as query sequence for finding start and stop codons.