#!/usr/bin/python
# Mapping NMP-seq reads to yeast reference genome and identify the lesion site
from glob import glob
import os 
from os.path import join as jp

# Setup folders and paths variables:
rawFolder = '01-remove-barcode'
bamFolder = '02-align'
countFolder = '03-count'
os.system('mkdir -p %s' % bamFolder)
os.system('mkdir -p %s' % countFolder)
bowtieIndex = '/Users/pmao/Documents/data/Resources/Yeast/yeast_genome_index/yeast_pUC'
genome = '/Users/pmao/Documents/data/Resources/Yeast/yeast_chromosome_sizes/yeast_genome.txt'
reference_assemble = '/Users/pmao/Documents/data/Resources/Yeast/yeast_genome_seq/yeast.fa'

mapping = open("mapping_commands.sh", 'w')

for r1 in glob("./01-remove-barcode/*.fastq"):
	sample = r1.split('/')[-1].split('.')[0]
	print sample
    # Run Bowtie2 to map samples, sort
	logFile = jp(bamFolder, sample + '_mapping.log')
	cmd = ' '.join(['echo', jp(sample + ".fastq")]) 
	cmd += '\n'
	cmd += ' '.join(["bowtie2 -x " + bowtieIndex + " -U", jp(rawFolder, sample + ".fastq"), 
                    "2>", logFile, 
                    "| samtools view -bS - | samtools sort - -o",
                    jp(bamFolder, sample) + ".sorted.bam"])
	cmd += '\n'
	cmd += ' '.join(['samtools index', jp(bamFolder, sample) + ".sorted.bam"])
	cmd += '\n'
	cmd += ' '.join(['bedtools bamtobed -i', jp(bamFolder, sample) + '.sorted.bam >', jp(bamFolder, sample) + '.bed'])
	cmd += '\n'
	cmd += ' '.join(['bedtools flank -i', jp(bamFolder, sample) + '.bed -g', genome, '-l 1 -r 0 -s >', 
	                 jp(bamFolder, sample) + '_mononuc.bed'])
	cmd += '\n'
	cmd += ' '.join(['bedtools getfasta -fi', reference_assemble, 
	                '-bed', jp(bamFolder, sample) + '_mononuc.bed -s -bedOut >', jp(bamFolder, sample) + '_bedOut.bed'])
	cmd += '\n'
	cmd += ' '.join(['bedtools getfasta -fi', reference_assemble, 
	                '-bed', jp(bamFolder, sample) + '_mononuc.bed -s -fo', jp(bamFolder, sample) + '.fa'])
	cmd += '\n'
	mapping.write(cmd+'\n')
cmd2 =' '.join(['python', countFolder+'/MMS_count.py'])
cmd2 +='\n'	
mapping.write(cmd2+'\n')
mapping.close()