This program codes numerically unknown parent groups from a pedigree file where missing parents have codes "GR", and there is date of birth of the animal in the 4th column
count_upg={}
count_new_upg={}
def define_group(yob):
if yob< 1995:
yob=1995
# The last year is 2022, check and change in new files
out= -1 + (yob - 2022 -1) // 5
return out
fin=open("pedigree.txt", "r")
fout=open("pedigreeUPG.txt","w")
for line in fin:
row=line.split()
for col in (1,2):
if "GR" in row[col]:
#It is a group
my_upg=row[col]
if my_upg in count_upg:
count_upg[my_upg]+=1
else:
count_upg[my_upg]=1
#assign new group
yob=int(row[3])
new_group=define_group(yob)
if new_group in count_new_upg:
count_new_upg[new_group]+=1
else:
count_new_upg[new_group]=1
row[col]=new_group
print(*row,file=fout)
for k,v in count_upg.items():
print(k,v)
for k,v in count_new_upg.items():
print(k,v)
and this other program reads a pedigree file, stores the animals, and then reads a genotype file one line at a time. The genotype line is written to file if the animal is present in the pedigree file.
list_animals=set()
fin=open("pedigreeUPG.txt","r")
for line in fin:
row=line.split()
anim=row[0]
list_animals.add(anim)
print("list animals",len(list_animals))
n=0
fin=open("genotypes.txt","r")
fout=open("genotypes.txt.clean","w")
for line in fin:
row=line.split()
anim=row[0]
if anim in list_animals:
n+=1
print(line.strip(),file=fout)
print(n)