Begin
- 使用Python处理DNA序列,本质上就是
IO操作
和字符串处理
。 - 代码使用菜单和用户进行交互
获取DNA序列
在https://www.ncbi.nlm.nih.gov/可以下载
处理操作
生成DNA互补序列
本质上是CG, AT的互换
生成DNA反向序列
reverse字符串即可
生成DNA反向互补序列
在互补的基础下reverse
检测GC含量
GC字符占字符串长度的比例
DNA转录为RNA
T换为U即可
运行
可测试使用的DNA序列
GAGTAGTCCCTTCGCAAGCCCTCATTTCACCAGGCCCCCGGCTTGGGGCGCCTTCCTTCCCCATGGCGGG
ACACCTGGCTTCGGATTTCGCCTTCTCGCCCCCTCCAGGTGGTGGAGGTGATGGGCCAGGGGGGCCGGAG
CCGGGCTGGGTTGATCCTCGGACCTGGCTAAGCTTCCAAGGCCCTCCTGGAGGGCCAGGAATCGGGCCGG
GGGTTGGGCCAGGCTCTGAGGTGTGGGGGATTCCCCCATGCCCCCCGCCGTATGAGTTCTGTGGGGGGAT
GGCGTACTGTGGGCCCCAGGTTGGAGTGGGGCTAGTGCCCCAAGGCGGCTTGGAGACCTCTCAGCCTGAG
GGCGAAGCAGGAGTCGGGGTGGAGAGCAACTCCGATGGGGCCTCCCCGGAGCCCTGCACCGTCACCCCTG
GTGCCGTGAAGCTGGAGAAGGAGAAGCTGGAGCAAAACCCGGAGGAGGCAAGTGAGCTTCGACGGGGTTG
GGGTGTGGGGAGGTGGTCATGACAGGGCAGCCTGATGGGGAAGTGGTCACCTGCAGCTGCCCAGACCTGG
CACCCAGGAGAGGAGCAGGCAGGGTCAGCTGCCCTGGCCAGGGAGGGGTGTGTATCAACTGCTGGCAGCC
CTGGCAGGCAGGGGCCAGGTGGGAAGTGGAAGCTGGATTTCGAAGAGACAACTGCCGGTGAGGGCAGAGC
AGCCTGGGAGAGTCGGAAGCTGGCCCAGGCTGGCCTTTGCTCTGGCCCAGCCCTTGTCAGGGTCTCTCAC
ATCTCCTAGGCCTGCCCAGGGTCTGGTCACTCATTACTGGCCCAGCACCAGACCCAGCTTGGGGTTGGTT
TGAGCCCCTTTTCCCACCCTTAGTCCTGCTTGAAAATTTGACCCTTATCAGACCCAAGATTTTGGCCTTA
GGGTTAAGCATAGCCTGAGGGTAAAAACAGTGCTCATTCCAGGATTATTGTTCCTGAAAGTCTAGGGTGT
GACTCGTTTCTGATAGGATCTCCTGTTTGGGCTGTGTGTGTGCGCGTTGTGAGCTGGGTTTACCTCCAGT
CAAGTATAGGGCTTGTCTTCCCCGGATCTCTGCCTCAGGCCAATGACTGGCCACTGTGTTAAGGTGCACA
CCCTGGCACCCCTTGTAGAAAGCTGGATTTTGATTGACTTCAGCCTCAGTTCCAAAGTTGTAAACAAGAA
AAATGGTGAGAGATTTCTCCAGGCCATTTGCAAATATAGAGCTGCTGCGGGATTGAAGGCATCCAGCCCT
GCTGAGGACTATTAAAGATGTATCTTCCAGTCCTTCAAGGCGACAAGTGTAAGCAATTAGAGATTAAATA
CTAAGCCTTGAGACCTCACAGAAAGGTGTGACTGGTTTCTGGAGTGACCGAGAAGCCCCAACCTCTTCGC
AGGAGGTCACTGCTGAGCCTTGAATGATAATGGCTGGCAATTGTGGTCCACTTCCTAAGTGCCTGGCTGT
GTGCTCCGTTTATACATCATTATCTCATTAACCAGCACAAAATCTCCTAGGGGGAGGTATTATTATCCTA
TTTAACGGGTTTTAACTGCTAAATGATGAAGCGAGGATTTGGACCAGTGTTTATTCCAAAACCCCAAAAC
AGAATTTGGAAAATCCAAGATAGCAGAGGGCATTTATCAGTTTGAGTTATTGGCTGAGCAGAAGTTGGGG
ATGAAAACAGCCTATTTGAAATTGATATGATCAAGCACCATTGAAACACTTCCTTGAGGCTTCAGAACTA
CAAAAAGGCCTTGTTTTTTTCTCACTAGCTGTGCACCTCTGTCTGCCGGCAGCCTCATATGGCATGCCCC
AGGGCTCAGTCCTTCAACCTCTGCTCTATCTACCCTTCCTTCCTCTCACCCACCCTCAAGGCTTAAATGC
CATTTAGACACCAGATGACTACCGCGTTTTCTGTCTCTTGTGATGGCTCCCTGAACTGCTCCACCCTGAT
CACCCAGTTGCTCAAGGCCAAACCCAGTCATCCTCAGTTTCTTTCATGTCCTACATCCTATCCTTAAGAA
ACATCCTGAATCAATCACAACCTAACCCTGGCCTCAGCCACCATCATCTCTGCTGGGATTACCGCAGTAG
CTTCTCAAATTATACTGCTTCCTCCCTACTGTCTGTGGCCAACACGTCAACTAGAGTCAGTGTTTTAAAA
GGTGTGGCCAGGCACTTTGGGAGGCCGAGGCAGGCGAATCACCTGAGGTCGGGAGTTCGAGTCCAGCCTG
ACCAACATGGCGAAACCCCATCTCTACTAAAAATACAAAATTAGCTGGGCGTGGTGACGCATGCCTGTAA
TCTCAGCTACTCAGGAAGCTGAGGCAGGAGAATCGCTTGAACCTGGGAGATGGAGGTTGCGGTGAGCCGA
GATCGCGCCAGTGCACTCCAGCCTAGGCAACAAAAGCGAAACTCTCAAAAAAAAAAAAAAAAAAGGTGAG
GCTAGGTGCGGTGGTTCACACCTGTAATCCCAGCACTTTGGGAGGCCAAGGTGGACAGATCACTTGAGGT
CTCCTGACCAGCCTGGCCAACATGGTGAAACCCCATATCTACTAAAAATACAAAAATTAGCCGGGCATGG
TGGTGGGTGCCTGTAGTCCCAGCTACTCAGGCGGCTGAGGCAGAATAGCTTGAACCCAGAAGGCGGAGAT
TGCAGTGAGCCAAGATCACGCCACAGCACTCCAGCCTGGGCGATAGAACGAGATTCCGTCTTGGTGGGGA
GAAAAAGGGTGAGAGATCATTTCGCTTGGACTAAAACAAAGTCACTATGTCTGCAACAGGATCTACCTAG
CCACCAGACCAGCTTTGGGCTCTGGAAGGCCCACTTCAGGGCCTTGCCACATTAGACTCTTGTCCTTTGC
TCAAACAATCACCTTCTCTGTCTTTAAAAGTGTCACCCTCCTCCATAATCTCCTTCCCTCCTTTACCCTA
CTCCTATAGACTGCTTTATTTTTTTTTTAATTTTTGAGATGGAGTCTCACTCTGTCCCTCAGGCTGGAGT
GCAGTGGTGCGATCTTGGCTCACTGCAAACTCCACCTCCTAGGTTCAAGCAATTCTCCTGCCTCAGCCTC
CTGAGTAGCTGGGATTATAGGGGAGCGCCATGATGCCCAGCTAATTTTTGTATTTTTAGTAGAGACAGAG
TCTCACTATGTTGACCAGGCTAGTCTTGAACTCCTGACCTCAAGTGATCTACCCACCTTGGCCTCCCAAA
GTGAAGGGATTACAGGCATGACCACTGCGCCCAGACTGCTTTACTTTTTTCCATAATATATATATATATT
TTAAATAGAGGCAGCAGGGGTGGGAGAAGGGGCGGCACGGGTCTCACTATGTTACCCAGGCTGCTTTCTA
ACTCTTGGGCTCAAGCAGTCTGCCCACCTTGGCCTCCCAAAGTGCTAGGATTTACAGACATGAGCCACTG
TGCCTGGCCATTTTTTATTTTATTTACTTTTTTATTTTTCAGAGCAGGAGTGGAAGTTTATTATTAAAAA
GTTATAGGGCAGGGAAAAAAGGAAAGTGCACTTGGAAGAGATCCAAGTGGGCAACTTGAAGAACAAGTGC
CAAATAGCACTTCTGTCATGCTGGATGTCAGGGCTCTTTGTCCACTTTGTATAGCCGCTGGCTTATAGAA
GGTGCTCGATAAATCTCTTGAATTTAAAAATCAATTAGGATGCCTCTATAGTGAAAAAGATACAGTAAAG
ATGAGGGATAATCAATTTAAAAAATGAGTAAGTACACACAAAGCACTTTATCCATTCTTATGACACCTGT
TACTTTTTTGCTGTGTTTGTGTGTATGCATGCCATGTTATAGTTTGTGGGACCCTCAAAGCAAGCTGGGG
AGAGTATATACTGAATTTAGCTTCTGAGACATGATGCTCTTCCTTTTTAATTAACCCAGAACTTAGCAGC
TTATCTATTTCTCTAATCTCAAAACATCCTTAAACTGGGGGTGATACTTGAGTGAGAGAATTTTGCAGGT
ATTAAATGAACTATCTTCTTTTTTTTTTTTCTTTGAGACAGAGTCTTGCTCTGTCACCCAGGCTGGAGTG
CAGTGGCGTGATCTCAGCTCACTGCAACCTCCGCCTCCCGGGTTCAAGTGATTCTCCTGCCTCAGCCTCC
TGAGTAGCTGGGATTACAGGTGCGTGCCACCGTGCCCAGCTAATTTTTGTGTTTTTAGTAGAGACGGGGT
TTCACCATGTTGGCCATGCTGGTCTTGAACTCCTGACCTCGTGATCTGCCCACCTCGGCCTCCCAAAGTG
CTGGAATTATAGGCGTGAGCCACCGCGCCCAGCAAAGAACTTCTAACCTTCATAACCTGACAGGTGTTCT
CGAGGCCAGGGTCTCTCTTTCTGTCCTTTCACGATGCTCTGCATCCCTTGGATGTGCCAGTTTCTGGGGG
AAGAGTAGTCCTTTGTTACATGCATGAGTCAGTGAACAGGGAATGGGTGAATGACATTTGTGGGTAGGTT
ATTTCTAGAAGTTAGGTGGGCAGCTTGGAAGGCAGAGGCACTTCTACAGACTATTCCTTGGGGCCACACG
TAGGTTCTTGAATCCCGAATGGAAAGGGGAGATTGATAACTGGTGTGTTTATGTTCTTACAAGTCTTCTG
CCTTTTAAAATCCAGTCCCAGGACATCAAAGCTCTGCAGAAAGAACTCGAGCAATTTGCCAAGCTCCTGA
AGCAGAAGAGGATCACCCTGGGATATACACAGGCCGATGTGGGGCTCACCCTGGGGGTTCTATTTGGTGG
GTTCCCCTCTGCAGATTCTGACCGCATCTCCCCTCTAAGGAGTATCCCTGAACCTAGTGGGGAGGGGCAG
GGGCAGACTACCCTCACCCATGAAGAGGAGTAGGGAGAGGGAGAAGATGCTTTGAGCTCCCTCTGGGAAG
AGGTGGTAAGCTTGGATCTCAGGGTCACAAGGGCCCTGCGTGCTCCCTCACTTTGCTTCTCTTTTGACTG
GCCTCCCCCAGGGAAGGTATTCAGCCAAACGACCATCTGCCGCTTTGAGGCTCTGCAGCTTAGCTTCAAG
AACATGTGTAAGCTGCGGCCCTTGCTGCAGAAGTGGGTGGAGGAAGCTGACAACAATGAAAATCTTCAGG
AGGTAAGGGTGGGAGGGGGATACCCGGGGACCTTCCCTTTCTTGGCCTAATTTCCATTGCTTCCATCACT
GGCTCGTAGCTCTCCGTCTTTGGTGCAGTGGTTCTCAGTGGGATGGAGTGAAATTCCTCAGTTCTGCTGG
GATAAGGTCCAGAGCCAACCCTTCCAGGATCCTGCCTTTTCACACCACCACCTGGCTCTGCTGACACATC
TAGTCACAGACCCCTGTGATGCTGTTACTCAGCAAGTCCAAAGCTTGCCCTTGTCACCCCCTTCCCACCT
GCACAGATATGCAAAGCAGAAACCCTCGTGCAGGCCCGAAAGAGAAAGCGAACCAGTATCGAGAACCGAG
TGAGAGGCAACCTGGAGAATTTGTTCCTGCAGTGCCCGAAACCCACACTGCAGCAGATCAGCCACATCGC
CCAGCAGCTTGGGCTCGAGAAGGATGTGAGTGCCATGTCTCTCTGCGGGCTCCATCTCTTTCCCCTGTCA
CCACCTCGCTTTCCCTAGCTCTGGCTCCTCCAACTGCTCTAGGGCTGTTGGCTTTGGACAGAATGTCCAA
GCAGTCAGGCCTGTCTCAGCTCATTCTCTAATGTCCTCCTCTAACTGCTCTAGGGCTGTTGGCTTTGGAT
AGAATGTCCAAGCAGAGTCAGGCCCGTCTCAGCTCATTGTCTAATGTCATTCTCCTTTCTGTCATTCACT
TGCAGGTGGTCCGAGTGTGGTTCTGTAACCGGCGCCAGAAGGGCAAGCGATCAAGCAGCGACTATGCACA
ACGAGAGGATTTTGAGGCTGCTGGGTCTCCTTTCTCAGGGGGACCAGTGTCCTTTCCTCTGGCCCCAGGG
CCCCATTTTGGTACCCCAGGCTATGGGAGCCCTCACTTCACTGCACTGTACTCCTCGGTCCCTTTCCCTG
AGGGGGAAGCCTTTCCCCCTGTCTCCGTCACCACTCTGGGCTCTCCCATGCATTCAAACTGAGGTGCCTG
CCCTTCTAGGAATGGGGGACAGGGGGAGGGGAGGAGCTAGGGAAAGAAAACCTGGAGTTTGTGCCAGGGT
TTTTGGGATTAAGTTCTTCATTCACTAAGGAAGGAATTGGGAACACAAAGGGTGGGGGCAGGGGAGTTTG
GGGCAACTGGTTGGAGGGAAGGTGAAGTTCAATGATGCTCTTGATTTTAATCCCACATCATGTATCACTT
TTTTCTTAAATAAAGAAGCCTGGGACACAGTAGATAGACACACTTA
代码
import sys
import os
DNA = ""
class toDO():
# 判断是否导入DNA
def isExistDNA(self):
if DNA == "null" or DNA == "":
print("========================================")
print("请先导入DNA再执行操作!")
print("========================================")
return False
else:
return True
# 7. 将DNA全部转为大写
def toUpper(self):
global DNA
if not (toDO.isExistDNA(self)):
return
DNA = DNA.upper()
# 写入文件
exportFile = "upperDNA.txt"
fp = open(exportFile, "w")
fp.write(DNA)
print("========================================")
print("转换为大写的序列保存在" + exportFile + "请查看~")
print("========================================")
# 6. 清除程序产生的所有文件
def cleanFiles(self):
try:
if os.path.exists("compDNA.txt"):
os.remove("compDNA.txt")
if os.path.exists("recDNA.txt"):
os.remove("recDNA.txt")
if os.path.exists("rev_compDNA.txt"):
os.remove("rev_compDNA.txt")
if os.path.exists("toRNA.txt"):
os.remove("toRNA.txt")
if os.path.exists("upperDNA.txt"):
os.remove("upperDNA.txt")
except Exception as e:
print(e)
print("========================================")
print("已删除程序生成的所有文件!")
print("========================================")
# 0. 导入需要处理的DNA序列(请先选择此选项)
def selectDNA(self):
global DNA
DNA = "null"
# fileName = "dna.txt"
fileName = input("请输入要读取的文件名(确保在当前目录下):")
if os.path.exists(fileName):
f = open(fileName)
DNA = f.read()
print("========================================")
print("导入成功!请进行后续操作~")
print("========================================")
else:
print("请输入正确的文件名!")
toDO.selectDNA(self)
# 1. 生成DNA互补序列
def compDNA(self):
if not (toDO.isExistDNA(self)):
return
compDNA = ""
tmpList = {'C': 'G', 'G': 'C', 'T': 'A', 'A': 'T',
'c': 'g', 'g': 'c', 't': 'a', 'a': 't'}
for c in DNA:
# 处理字符串中的换行符
if (c != "\n"):
compDNA += tmpList[c]
else:
compDNA += c
# 写入文件
exportFile = "compDNA.txt"
fp = open(exportFile, "w")
fp.write(compDNA)
print("========================================")
print("生成的序列已保存至" + exportFile + "请查看~")
print("========================================")
# 2. 生成DNA反向序列
def revDNA(self):
if not (toDO.isExistDNA(self)):
return
tmp = list(DNA)
tmp.reverse()
# list转为str
recDNA = "".join(tmp)
# print(recDNA)
# 写入文件
exportFile = "recDNA.txt"
fp = open(exportFile, "w")
fp.write(recDNA)
print("========================================")
print("生成的序列已保存至" + exportFile + "请查看~")
print("========================================")
# 3. 生成DNA反向互补序列
def rev_compDNA(self):
if not (toDO.isExistDNA(self)):
return
# 处理反向
tmp = list(DNA)
tmp.reverse()
# list转为str
recDNA = "".join(tmp)
# 处理互补
rev_compDNA = ""
tmpList = {'C': 'G', 'G': 'C', 'T': 'A', 'A': 'T',
'c': 'g', 'g': 'c', 't': 'a', 'a': 't'}
for c in recDNA:
# 处理字符串中的换行符
if (c != "\n"):
rev_compDNA += tmpList[c]
else:
rev_compDNA += c
# 写入文件
exportFile = "rev_compDNA.txt"
fp = open(exportFile, "w")
fp.write(rev_compDNA)
print("========================================")
print("生成的序列已保存至" + exportFile + "请查看~")
print("========================================")
# 4. 检测GC含量
def calGC(self):
if not (toDO.isExistDNA(self)):
return
gc = DNA.count('G') + DNA.count('C') + DNA.count('g') + DNA.count('c')
result = gc / len(DNA)
print("========================================")
print("当前DNA的GC含量为:" + str(result))
print("========================================")
# 5. DNA翻译为RNA
def toRNA(self):
if not (toDO.isExistDNA(self)):
return
rna = DNA.replace("T", "U").replace("t", "u")
# 写入文件
exportFile = "toRNA.txt"
fp = open(exportFile, "w")
fp.write(rna)
print("========================================")
print("生成的序列已保存至" + exportFile + "请查看~")
print("========================================")
# print(rna)
# 6. 退出程序
def quit(self):
print("\n欢迎再次使用~\n")
sys.exit(0)
class ProgramMenu():
# 显示菜单界面
def displayMenu(self):
# 输出带颜色的文字
# 如果不兼容的话,就输出下面不带颜色的
# print("""
# \033[1;32;40m========================================\033[0m
# 请选择您要进行的DNA序列处理操作:
# \033[1;32;40m0. 导入需要处理的DNA序列(请先选择此选项)\033[0m
# \033[1;36;40m1. 生成DNA互补序列
# 2. 生成DNA反向序列
# 3. 生成DNA反向互补序列
# 4. 检测GC含量
# 5. DNA转录为RNA\033[0m
# \033[1;33;40m6. 删除程序产生的文件
# 7. 将DNA全部转为大写并保存\033[0m
# \033[1;31;40m8. 退出程序\033[0m
# \033[1;32;40m========================================\033[0m
# """ )
print("""
========================================
请选择您要进行的DNA序列处理操作:
0. 导入需要处理的DNA序列(请先选择此选项)
1. 生成DNA互补序列
2. 生成DNA反向序列
3. 生成DNA反向互补序列
4. 检测GC含量
5. DNA转录为RNA
6. 删除程序产生的文件
7. 将DNA全部转为大写并保存
8. 退出程序
========================================
""")
# 程序主进程
def begin(self):
while True:
self.displayMenu()
try:
num = int(input("输入选项:"))
except Exception as e:
print("请输入正确的选项!")
continue
if num == 8:
toDO.quit(self)
elif num == 0:
toDO.selectDNA(self)
elif num == 1:
toDO.compDNA(self)
elif num == 2:
toDO.revDNA(self)
elif num == 3:
toDO.rev_compDNA(self)
elif num == 4:
toDO.calGC(self)
elif num == 5:
toDO.toRNA(self)
elif num == 6:
toDO.cleanFiles(self)
elif num == 7:
toDO.toUpper(self)
if __name__ == "__main__":
ProgramMenu().begin()
参考链接:
- https://blog.csdn.net/u012904337/article/details/79504319
- https://blog.csdn.net/u011262253/article/details/88542804
版权属于:moluuser
本文链接:https://archive.moluuser.com/archives/52/
本作品采用知识共享署名-非商业性使用-相同方式共享 4.0 国际许可协议进行许可。