Splitting MiSeq lane

From BioPerl
Jump to: navigation, search

This is a script for splitting a full lane from a multiplexed MiSeq run into individual tags.

#!/usr/bin/env perl
# Author: Lee Katz <lkatz@cdc.gov>
# Enteric Diseases Laboratory Branch, CDC
 
use LKUtils qw/logmsg/;
use strict;
use warnings;
use Data::Dumper;
use Getopt::Long;
 
my $settings={};
exit(main());
 
sub main{
  die usage() if(@ARGV<1);
  GetOptions($settings,qw(infile=s outdir=s));
  my $infile=$$settings{infile} or die "Error: need input Illumina file\n".usage();
  my $outdir=$$settings{outdir} or die "Error: need output directory \n".usage();
 
  splitFastqFile($infile,$outdir,$settings);
 
  return 0;
}
 
sub splitFastqFile{
  my($infile,$outdir,$settings)=@_;
 
  # open files
  mkdir($outdir) or die "Could not mkdir $outdir because $!" if(!-d $outdir);
  my %fh;
  open($fh{$_}, '>', "$outdir/$_.fastq") or die "Cannot open a file in $outdir: $!" for(0..8);
 
  my $currId;
  my $i=0;
  open(IN,'<',$infile) or die "Could not open fastq file $infile because $!";
  while(my $fastqEntry=<IN>){
    $fastqEntry.=<IN> for(1..3); # each entry is 4 lines total
    my($currId,$seq,undef,$qlt)=split(/\n/,$fastqEntry);
    my($part1,$part2)=split(/\s+/,$currId);
    my($instrument,$runId,$flowCellId,$lane,$tile,$x,$y)=split(/:/,$part1);
    my($ReadNum,$FilterFlag,undef,$SampleNumber)=split(/:/,$part2);
    my $fh=$fh{$SampleNumber};
    print $fh "$fastqEntry";
    $i++;
    logmsg "Finished with $i reads" if($i%100000==0);
  }
  close IN;
 
  close($fh{$_}) for(keys(%fh));
}
 
sub usage{
  "Given a fastq file of reads, split the fastq file into files.
  Usage: perl $0 -i illumina.fastq -o outputdir
  -i illumina.fastq
    e.g. from a miseq
  "
}
Personal tools
Namespaces
Variants
Actions
Main Links
documentation
community
development
Toolbox