import sys
import numpy as np
f=open(sys.argv[2],"r")
f2=open(sys.argv[1],"r")
f1=open("out.txt","w")
import math
# 隐状态
def compute(obs, states, start_p, trans_p, emit_p):
max_p = np.zeros((len(obs), len(states)))
path = np.zeros((len(states), len(obs)))
for i in range(len(states)):
max_p[0][i] = math.log(start_p[i]) + emit_p[i][obs[0]]
path[i][0] = i
for t in range(1, len(obs)):
#print(t)
newpath = np.zeros((len(states), len(obs)))
for y in range(len(states)):
prob = -100000000
for y0 in range(len(states)):
nprob = max_p[t-1][y0] + trans_p[y0][y] + emit_p[y][obs[t]]
if nprob > prob:
prob = nprob
state = y0
max_p[t][y] = prob
for m in range(t):
newpath[y][m] = path[state][m]
newpath[y][t] = y
path = newpath
max_prob = -100000000
path_state = 0
for y in range(len(states)):
if max_p[len(obs)-1][y] > max_prob:
max_prob = max_p[len(obs)-1][y]
path_state = y
#print(max_prob)
return path[path_state]
state_s = []
obser=[]#fasta file
alp=[]#alphabet
#get hmm file
lines=f2.readlines()
wordlist=lines[0].split()#将每一行的数字分开放在列表中
#for a in wordlist:#遍历每一行的数字
#print (a)
statenum=int(wordlist[0])
hidden_state = []
for k in range(statenum):
state_s.append(k)
hidden_state.append(chr(ord(‘A‘)+k))
letternum=wordlist[1]
#print((wordlist[2]))
for i in range(len(wordlist[2])):
alp.append((wordlist[2][i]).lower())
#print(alp[i])
wordlist=lines[1].split()#将每一行的数字分开放在列表中
start_probability =[]
for a in wordlist:#遍历每一行的数字
#print (a)
start_probability.append(float(a))
#print(statenum)
#print(letternum)
transititon_probability=np.zeros((int(statenum),int(statenum)))
emission_probability=np.zeros((int(statenum),int(letternum)))
#print(emission_probability[2][2])
for nn in range(int(statenum)):
wordlist=lines[nn+2].split()#将每一行的数字分开放在列表中
#print(len(wordlist))
for k in range(int(statenum)):
transititon_probability[nn][k]=math.log(float(wordlist[k]))
#print(transititon_probability[nn][k])
for k in range(int(letternum)):
emission_probability[nn][k]=math.log(float(wordlist[k+statenum]))
#print("emi")
#print(emission_probability[nn][k])
#print("read%d"%(nn))
for line in f:#get fasta file
for a in line:
if(a==">"):
break
for k in range(len(alp)):
if(a.lower()==alp[k]):
obser.append(k)
#transititon_probability = np.array([[math.log(0.999), math.log(0.001)], [math.log(0.01), math.log(0.99)]])#transition table
#emission_probability = np.array([[math.log(0.35), math.log(0.15), math.log(0.15),math.log(0.35)], [math.log(0.15), math.log(0.35), math.log(0.35),math.log(0.15)]])#emission table
result = compute(obser, state_s, start_probability, transititon_probability, emission_probability)
coun=0
sta=-1
pos=1
#print(result)
for k in range(len(result)):
if (sta != -1 and sta != int(result[k])):
print("%7d%7d state %s" % (pos, k, hidden_state[sta]))
f1.write("%7d%7d state %s\n" % (pos, k, hidden_state[sta]))
if (sta == 1):
coun += 1
pos = k+1
sta = int(result[k])
print("%7d%7d state %s" % (pos, k+1, hidden_state[sta]))
f1.write("%7d%7d state %s\n" % (pos, k+1, hidden_state[sta]))
原文:https://www.cnblogs.com/hyffff/p/12643543.html