Skip to content
Laurent Jourdren edited this page Mar 26, 2015 · 2 revisions

Writing a wrapper on a mapper.

WARNING: This documentation is outdated and will soon be updated.

Eoulsan defines a SequenceReadsMapper interface that allows to integrate a mapper in the mapreads step.

Available SequenceReadsMapper implementations

The following SequenceReadsMapper implementations are available:

  • BowtieReadsMapper runs Bowtie, version 0.5.8c
  • BWAReadsMapper runs BWA, version 0.12.7
  • SOAPReadsMapper runs SOAP, version 2.20
  • GSNAPReadsMapper runs GSNAP, version 2012-07-12, with the index created by the version 2012-07-12 of GMAP.

Writing a plug-in for the mapreads step of Eoulsan

It is very easy to write a plug-in for the mapreads step of Eoulsan. In this section we will write a MyMapperReadsMapper class that installs, creates the index and runs the program !MyMapper. Firstly, consider that this mapper uses GMAP to generate the index on the reference genome (this index generator is already integrated into Eoulsan). Then, we will explain how write a generator index wrapper for Eoulsan.

The wrapper on the mapper

  • First, we have to init the mapper by adding the init() method to the new class:

package com.example;

// the AbstractSequenceReadsMapper class implements the SequenceReadsMapper interface
public class MyMapperReadsMapper extends AbstractSequenceReadsMapper {

  // here the default arguments of MyMapper
  public static final String DEFAULT_ARGUMENTS = "";

  @Override
  public void init(final boolean pairedEnd, final FastqFormat fastqFormat,
      final ReporterIncrementer incrementer, final String counterGroup) {

    // call to the super implementation of this method
    super.init(pairedEnd, fastqFormat, incrementer, counterGroup);
    // the default arguments are stored in the super object
    setMapperArguments(DEFAULT_ARGUMENTS);
  }

  • Then, add getMapperName(), getMapperVersion() and isSplitsAllowed() methods:

  private static final String MAPPER_NAME = "MyMapper";
  private static final String SYNC = MyMapperReadsMapper.class.getName();

  // name of the executable stored in the src/main/java/files/linux/amd64 source folder
  private static final String MAPPER_EXECUTABLE = "mymapper";

  @Override
  public String getMapperName() {

    return MAPPER_NAME;
  }

  @Override
  public String getMapperVersion() {
    
    // path to the executable of the program
    final String myMapperPath;

    // installation of the program if necessary
    synchronized (SYNC) {
      myMapperPath = install(MAPPER_EXECUTABLE);
    }

    // run the program with the option to have the version
    final String cmd = myMapperPath + " --version";
    final String s = ProcessUtils.execToString(cmd);
    
    return s;
  }

  @Override
  public boolean isSplitsAllowed() {
    // consider that MyMapper can be run in the distributed mode
    return true;
  }

}

  • Now, add getArchiveFormat(), getIndexerCommand() and getIndexerExecutable() methods in relation to the generation of the index:

  // name of the executable bundled in the src/main/java/files/linux/amd64 source folder
  private static final String INDEXER_EXECUTABLE = "gmap_build_2012-07-12";

  @Override
  public DataFormat getArchiveFormat() {
    return DataFormats.GMAP_INDEX_ZIP;
  }

  @Override
  protected String getIndexerExecutable() {
    return INDEXER_EXECUTABLE;
  }

  @Override
  protected String getIndexerCommand(String indexerPathname, String genomePathname) {

    final String binariesDirectory = new File(indexerPathname).getParentFile().getAbsolutePath();
    final String genomeDirectory = new File(genomePathname).getParentFile().getAbsolutePath();

    // return the command line to use to compute the GMAP index
    return indexerPathname
        + " -B " + binariesDirectory + " -D " + genomeDirectory + " -d genome " + genomePathname;
  }

  • Some mappers need to know the quality protocol of the FASTQ format of input data. In these cases, we add a method like this:

  // Method that returns the quality protocol of the FASTQ format given in argument. 
  private static final String getMyMapperQualityArgument(final FastqFormat format) throws IOException {

    switch (format) {

    // return the String object corresponding to the argument of the command line that runs the mapper

    case FASTQ_ILLUMINA:
      return "--quality-protocol=illumina";

    case FASTQ_ILLUMINA_1_5:
      return "--quality-protocol=illumina";

    case FASTQ_SOLEXA:
      throw new IOException("MyMapper not handle the Solexa FASTQ format.");

    case FASTQ_SANGER:
    default:
      return "--quality-protocol=sanger";
    }
  }

  • Finally, add the internalMap() methods for both single-end and paired-end modes:

  // single-end mode
  @Override
  protected void internalMap(File readsFile, File archiveIndexDir) throws IOException {

    final String myMapperPath;

    // installation of the program if necessary
    synchronized (SYNC) {
      myMapperPath = install(MAPPER_EXECUTABLE);
    }

    final File outputFile =
        FileUtils.createTempFile(readsFile.getParentFile(), getMapperName()
            .toLowerCase() + "-outputFile-", ".sam");

    // Build the command line. In exemple here: the arguments to run GSNAP, to show those used by the majority
    // of mappers. 
    // getThreadsNumber(): return the number of threads that can be used.
    final String cmd =
        myMapperPath
            + " -A sam " + getMyMapperQualityArgument(getFastqFormat()) + " -t "
            + getThreadsNumber() + " -D " + archiveIndexDir.getAbsolutePath()
            + " -d genome " + getMapperArguments() + " "
            + readsFile.getAbsolutePath() + " > "
            + outputFile.getAbsolutePath() + " 2> /dev/null";

    LOGGER.info(cmd);

    // run the shell command line
    final int exitValue = sh(cmd);

    if (exitValue != 0) {
      throw new IOException("Bad error result for " + MAPPER_NAME + " execution: " + exitValue);
    }

    this.outputFile = outputFile;

  }
  
  // paired-end mode
  @Override
  protected void internalMap(File readsFile1, File readsFile2,
      File archiveIndexDir) throws IOException {

    final String myMapperPath;

    // installation of the program if necessary
    synchronized (SYNC) {
      myMapperPath = install(MAPPER_EXECUTABLE);
    }

    final File outputFile =
        FileUtils.createTempFile(readsFile1.getParentFile(), getMapperName()
            .toLowerCase() + "-outputFile-", ".sam");

    // Build the command line
    final String cmd =
        myMapperPath
            + " -A sam " + getMyMapperQualityArgument(getFastqFormat()) + " -t "
            + getThreadsNumber() + " -D " + archiveIndexDir.getAbsolutePath()
            + " -d genome " + getMapperArguments() + " "
            + readsFile1.getAbsolutePath() + " " + readsFile2.getAbsolutePath()
            + " > " + outputFile.getAbsolutePath() + " 2> /dev/null";

    LOGGER.info(cmd);

    final int exitValue = sh(cmd);

    if (exitValue != 0) {
      throw new IOException("Bad error result for " + MAPPER_NAME + " execution: " + exitValue);
    }

    this.outputFile = outputFile;

  }

  • Now our SequenceReadsMapper can compile and can be used in a standalone program but not as a mapreads plug-in. To enable our SequenceReadsMapper as a plug-in we must register it by adding the full name of the class in the fr.ens.transcriptome.eoulsan.bio.readsmapper.SequenceReadsMapper text file in the META-INF/services directory. See the Writing Step Plugin for more information:
com.example.MyMapperReadsMapper

The wrapper on the index generator

Here, we suppose that MyMapper has a specific index generator nammed MyMapper-build rather than the GMAP index generator. The executable mymapper-build of this index generator must be bundled in the src/main/java/files/linux/amd64 source folder.

  • First, there are some modifications to do in the three methods getArchiveFormat(), getIndexerCommand() and getIndexerExecutable() already implemented above:

  // name of the executable 
  private static final String INDEXER_EXECUTABLE = "mymapper-build";

  @Override
  public DataFormat getArchiveFormat() {
    return DataFormats.MYMAPPER_INDEX_ZIP;
  }

  @Override
  protected String getIndexerExecutable() {
    return INDEXER_EXECUTABLE;
  }

  @Override
  protected String getIndexerCommand(String indexerPathname, String genomePathname) {

    final String binariesDirectory = new File(indexerPathname).getParentFile().getAbsolutePath();
    final String genomeDirectory = new File(genomePathname).getParentFile().getAbsolutePath();

    // return the command line to use to compute the index
    return indexerPathname
        + " -B " + binariesDirectory + " -D " + genomeDirectory + " -d genome " + genomePathname;
  }


// The DataType object
public static final DataType MYMAPPER_INDEX = new AbstractDataType() {

  @Override
  public String getName() {

    return "mymapper_index";
  }

  @Override
  public String getPrefix() {

    return "mymapper_index_";
  }

  @Override
  public boolean isOneFilePerAnalysis() {

    return true;
  }
};

// The DataFormat object
public final class MyMapperIndexZipDataFormat extends AbstractDataFormat {

  public static final String FORMAT_NAME = "mymapper_index_zip";

  public DataType getType() {

    return DataTypes.MYMAPPER_INDEX;
  }

  @Override
  public String getDefaultExtention() {

    return ".zip";
  }

  @Override
  public String getFormatName() {

    return FORMAT_NAME;
  }

  @Override
  public String getContentType() {

    return "application/zip";
  }

  @Override
  public boolean isGenerator() {

    return true;
  }

  @Override
  public Step getGenerator() {

    return new GenomeMapperIndexGeneratorStep("mymapper");
  }

}

// creation of a constant with the instance of the DataFormat
DataFormat MYMAPPER_INDEX_ZIP = resgistry.getDataFormatFromName(MyMapperIndexZipDataFormat.FORMAT_NAME);

// in the fr.ens.transcriptome.eoulsan.data.DataFormat text file in the META-INF/services directory
fr.ens.transcriptome.eoulsan.data.MyMapperIndexZipDataFormat