#IMPORTANT: this script must be started from the same file directory as your input file!
# This script, the .fastq files from the run you want to analyse and the WIMP.csv file must be in the same directory!
inputfilename="WIMP_inputfile.csv" #change the inputfile here
from multiprocessing import Pool
import concurrent.futures #imports the multithreading library
# Define a function for the thread
def search_fasta(WIMP_inputline):
WIMP_inputline=WIMP_inputline.rstrip()
fastqfilename=WIMP_inputline.split("-",2)[0]+".fastq"
#print(str(fastqfilename))
readID=WIMP_inputline.split(",",3)[1]
fqfile=open(fastqfilename, 'r').readlines()
fqreadID= fqfile[fqcounter*4].split()[0][1:37]
readlenght=len(fqfile[fqcounter*4+1]) # if you want to get the sequence instead of the lenght, remove the len() function.
else: fqcounter=fqcounter+1
completeline=WIMP_inputline+","+str(readlenght)+"\n"
if __name__ == "__main__":
dirname = os.path.join("C:/WIMPlenght_tmp")
filename=inputfilename.split(".")[0]
file_lines= open(inputfilename, 'r').readlines()
while filelinecounter throughputfilename=filename+"_"+str(filecounter).zfill(6)+".csv"
print(throughputfilename)
while filelinecounter outfile=open(dirname+"/"+throughputfilename, 'a')
outfile.write(file_lines[filelinecounter])
filelinecounter=filelinecounter+1
filecounter=filecounter+1
print("Filenumber: ", filecounter)
print("tmpfiles complete")
outputfilename=inputfilename.split(".")[0]+"_Output_WIMP&Seqlenght.csv"
tmpfilename=filename+"_"+str(i).zfill(6)+".csv"
WIMP_lines = open(os.path.join(dirname+"/"+tmpfilename), 'r').readlines() #opens the tmp WIMP outputfile and creates a list with each line as one item in the list
with open(outputfilename, 'a') as outfile:
result=p.map(search_fasta, WIMP_lines)
print("tmpfiles deleted")