Splitting MiSeq lane
From BioPerl
This is a script for splitting a full lane from a multiplexed MiSeq run into individual tags.
#!/usr/bin/env perl # Author: Lee Katz <lkatz@cdc.gov> # Enteric Diseases Laboratory Branch, CDC use LKUtils qw/logmsg/; use strict; use warnings; use Data::Dumper; use Getopt::Long; my $settings={}; exit(main()); sub main{ die usage() if(@ARGV<1); GetOptions($settings,qw(infile=s outdir=s)); my $infile=$$settings{infile} or die "Error: need input Illumina file\n".usage(); my $outdir=$$settings{outdir} or die "Error: need output directory \n".usage(); splitFastqFile($infile,$outdir,$settings); return 0; } sub splitFastqFile{ my($infile,$outdir,$settings)=@_; # open files mkdir($outdir) or die "Could not mkdir $outdir because $!" if(!-d $outdir); my %fh; open($fh{$_}, '>', "$outdir/$_.fastq") or die "Cannot open a file in $outdir: $!" for(0..8); my $currId; my $i=0; open(IN,'<',$infile) or die "Could not open fastq file $infile because $!"; while(my $fastqEntry=<IN>){ $fastqEntry.=<IN> for(1..3); # each entry is 4 lines total my($currId,$seq,undef,$qlt)=split(/\n/,$fastqEntry); my($part1,$part2)=split(/\s+/,$currId); my($instrument,$runId,$flowCellId,$lane,$tile,$x,$y)=split(/:/,$part1); my($ReadNum,$FilterFlag,undef,$SampleNumber)=split(/:/,$part2); my $fh=$fh{$SampleNumber}; print $fh "$fastqEntry"; $i++; logmsg "Finished with $i reads" if($i%100000==0); } close IN; close($fh{$_}) for(keys(%fh)); } sub usage{ "Given a fastq file of reads, split the fastq file into files. Usage: perl $0 -i illumina.fastq -o outputdir -i illumina.fastq e.g. from a miseq " }