Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Refactor Dataset save methods for consistency #2199

Open
heuermh opened this issue Aug 26, 2019 · 2 comments
Open

Refactor Dataset save methods for consistency #2199

heuermh opened this issue Aug 26, 2019 · 2 comments

Comments

@heuermh
Copy link
Member

heuermh commented Aug 26, 2019

Abstract classes

trait GenomicDataset[T, U <: Product, V <: GenomicDataset[T, U, V]] extends Logging {
  def saveAsParquet(args: SaveArgs): Unit = {
  }
  def saveAsParquet(
    pathName: String,
    blockSize: Int = 128 * 1024 * 1024,
    pageSize: Int = 1 * 1024 * 1024,
    compressCodec: CompressionCodecName = CompressionCodecName.GZIP,
    disableDictionaryEncoding: Boolean = false): Unit
  }
  def saveAsPartitionedParquet(pathName: String,
                               compressCodec: CompressionCodecName = CompressionCodecName.GZIP,
                               partitionSize: Int = 1000000) {
  }
}

abstract class AvroGenomicDataset[T <% IndexedRecord: Manifest, U <: Product, V <: AvroGenomicDataset[T, U, V]] extends GenomicDataset[T, U, V] {

  protected def saveRddAsParquet(args: SaveArgs): Unit = {
  }
  protected def saveRddAsParquet(
    pathName: String,
    blockSize: Int = 128 * 1024 * 1024,
    pageSize: Int = 1 * 1024 * 1024,
    compressCodec: CompressionCodecName = CompressionCodecName.GZIP,
    disableDictionaryEncoding: Boolean = false,
    optSchema: Option[Schema] = None): Unit = SaveAsADAM.time {
  }
  def saveAsParquet(
    pathName: String,
    blockSize: Int = 128 * 1024 * 1024,
    pageSize: Int = 1 * 1024 * 1024,
    compressCodec: CompressionCodecName = CompressionCodecName.GZIP,
    disableDictionaryEncoding: Boolean = false) {
  }
  def saveAsParquet(
    pathName: java.lang.String,
    blockSize: java.lang.Integer,
    pageSize: java.lang.Integer,
    compressCodec: CompressionCodecName,
    disableDictionaryEncoding: java.lang.Boolean) {
  }
  def saveAsParquet(pathName: java.lang.String) {
  }
}

Concrete classes

abstract class CoverageDataset {
  def saveAsParquet(filePath: String,
                    blockSize: Int = 128 * 1024 * 1024,
                    pageSize: Int = 1 * 1024 * 1024,
                    compressCodec: CompressionCodecName = CompressionCodecName.GZIP,
                    disableDictionaryEncoding: Boolean = false) {
  }
  def save(filePath: java.lang.String,
           asSingleFile: java.lang.Boolean,
           disableFastConcat: java.lang.Boolean) = {
  }
}

case class DatasetBoundFeatureDataset {
  override def saveAsParquet(filePath: String,
                             blockSize: Int = 128 * 1024 * 1024,
                             pageSize: Int = 1 * 1024 * 1024,
                             compressCodec: CompressionCodecName = CompressionCodecName.GZIP,
                             disableDictionaryEncoding: Boolean = false) {
  }
}

sealed abstract class FeatureDataset {
  def save(filePath: java.lang.String,
           asSingleFile: java.lang.Boolean,
           disableFastConcat: java.lang.Boolean) {
  }
  def saveAsGtf(fileName: String,
                asSingleFile: Boolean = false,
                disableFastConcat: Boolean = false) = {
  }
  def saveAsGff3(fileName: String,
                 asSingleFile: Boolean = false,
                 disableFastConcat: Boolean = false) = {
  }
  def saveAsUcscBed(fileName: String,
                    asSingleFile: Boolean = false,
                    disableFastConcat: Boolean = false,
                    minimumScore: Double,
                    maximumScore: Double,
                    missingValue: Int = 0) = {
  }
  def saveAsBed(fileName: String,
                asSingleFile: Boolean = false,
                disableFastConcat: Boolean = false) = {
  }
  def saveAsIntervalList(fileName: String,
                         asSingleFile: Boolean = false,
                         disableFastConcat: Boolean = false) = {
  }
  def saveAsNarrowPeak(fileName: String,
                       asSingleFile: Boolean = false,
                       disableFastConcat: Boolean = false) {
  }

case class DatasetBoundFragmentDataset {
  override def saveAsParquet(filePath: String,
                             blockSize: Int = 128 * 1024 * 1024,
                             pageSize: Int = 1 * 1024 * 1024,
                             compressCodec: CompressionCodecName = CompressionCodecName.GZIP,
                             disableDictionaryEncoding: Boolean = false) {
  }
}

sealed abstract class FragmentDataset {
  def save(filePath: java.lang.String) {
  }
}

case class DatasetBoundAlignmentDataset {
  override def saveAsParquet(filePath: String,
                             blockSize: Int = 128 * 1024 * 1024,
                             pageSize: Int = 1 * 1024 * 1024,
                             compressCodec: CompressionCodecName = CompressionCodecName.GZIP,
                             disableDictionaryEncoding: Boolean = false) {
  }
}

sealed abstract class AlignmentDataset {
  def save(args: ADAMSaveAnyArgs,
           isSorted: Boolean = false): Boolean = {
  }
  def save(filePath: java.lang.String,
           isSorted: java.lang.Boolean): java.lang.Boolean = {
  }
  def saveAsSamString(): String = {
  }
  def saveAsSam(
    filePath: String,
    asType: Option[SAMFormat] = None,
    asSingleFile: Boolean = false,
    isSorted: Boolean = false,
    deferMerging: Boolean = false,
    disableFastConcat: Boolean = false): Unit = SAMSave.time {
  }
  def saveAsSam(
    filePath: String,
    asType: Option[SAMFormat],
    asSingleFile: Boolean,
    sortOrder: SAMFileHeader.SortOrder,
    deferMerging: Boolean,
    disableFastConcat: Boolean): Unit = SAMSave.time {
  }
  def saveAsSam(
    filePath: java.lang.String,
    asType: SAMFormat,
    asSingleFile: java.lang.Boolean,
    isSorted: java.lang.Boolean) {
  }
  def saveAsPairedFastq(
    fileName1: String,
    fileName2: String,
    writeOriginalQualityScores: java.lang.Boolean,
    asSingleFile: java.lang.Boolean,
    disableFastConcat: java.lang.Boolean,
    validationStringency: ValidationStringency,
    persistLevel: StorageLevel) {
  }
  def saveAsPairedFastq(
    fileName1: String,
    fileName2: String,
    writeOriginalQualityScores: Boolean = false,
    asSingleFile: Boolean = false,
    disableFastConcat: Boolean = false,
    validationStringency: ValidationStringency = ValidationStringency.LENIENT,
    persistLevel: Option[StorageLevel] = None) {
  }
  def saveAsFastq(
    fileName: String,
    writeOriginalQualityScores: java.lang.Boolean,
    sort: java.lang.Boolean,
    asSingleFile: java.lang.Boolean,
    disableFastConcat: java.lang.Boolean,
    validationStringency: ValidationStringency) {
  }
  def saveAsFastq(
    fileName: String,
    fileName2Opt: Option[String] = None,
    writeOriginalQualityScores: Boolean = false,
    sort: Boolean = false,
    asSingleFile: Boolean = false,
    disableFastConcat: Boolean = false,
    validationStringency: ValidationStringency = ValidationStringency.LENIENT,
    persistLevel: Option[StorageLevel] = None) {
  }
}

case class DatasetBoundReadDataset {
  override def saveAsParquet(filePath: String,
                             blockSize: Int = 128 * 1024 * 1024,
                             pageSize: Int = 1 * 1024 * 1024,
                             compressCodec: CompressionCodecName = CompressionCodecName.GZIP,
                             disableDictionaryEncoding: Boolean = false) {
  }
}

sealed abstract class ReadDataset {
  def save(filePath: java.lang.String, asSingleFile: java.lang.Boolean) {
  }
  def saveAsFastq(filePath: String,
                  asSingleFile: Boolean = false,
                  disableFastConcat: Boolean = false) {
  }
}

case class DatasetBoundSequenceDataset  {
  override def saveAsParquet(filePath: String,
                             blockSize: Int = 128 * 1024 * 1024,
                             pageSize: Int = 1 * 1024 * 1024,
                             compressCodec: CompressionCodecName = CompressionCodecName.GZIP,
                             disableDictionaryEncoding: Boolean = false) {
  }
}

sealed abstract class SequenceDataset {
  def save(
    filePath: java.lang.String,
    asSingleFile: java.lang.Boolean,
    disableFastConcat: java.lang.Boolean) {
  }
  def saveAsFasta(filePath: String,
                  asSingleFile: Boolean = false,
                  disableFastConcat: Boolean = false,
                  lineWidth: Int = 60) {
  }
}

case class DatasetBoundSliceDataset {
  override def saveAsParquet(filePath: String,
                             blockSize: Int = 128 * 1024 * 1024,
                             pageSize: Int = 1 * 1024 * 1024,
                             compressCodec: CompressionCodecName = CompressionCodecName.GZIP,
                             disableDictionaryEncoding: Boolean = false) {
  }
}

sealed abstract class SliceDataset {
  def save(
    filePath: java.lang.String,
    asSingleFile: java.lang.Boolean,
    disableFastConcat: java.lang.Boolean) {
  }
  def saveAsFasta(filePath: String,
                  asSingleFile: Boolean = false,
                  disableFastConcat: Boolean = false,
                  lineWidth: Int = 60) {
  }
}

case class DatasetBoundGenotypeDataset {
  override def saveAsParquet(filePath: String,
                             blockSize: Int = 128 * 1024 * 1024,
                             pageSize: Int = 1 * 1024 * 1024,
                             compressCodec: CompressionCodecName = CompressionCodecName.GZIP,
                             disableDictionaryEncoding: Boolean = false) {
  }
}

sealed abstract class GenotypeDataset {
  def saveVcfHeaders(filePath: String): Unit = {
  }
}

sealed abstract class VariantContextDataset {
  def saveVcfHeaders(filePath: String): Unit = {
  }
  def saveAsParquet(pathName: String,
                    blockSize: Int = 128 * 1024 * 1024,
                    pageSize: Int = 1 * 1024 * 1024,
                    compressCodec: CompressionCodecName = CompressionCodecName.GZIP,
                    disableDictionaryEncoding: Boolean = false) {
  }
  def saveAsVcf(args: ADAMSaveAnyArgs,
                stringency: ValidationStringency = ValidationStringency.LENIENT): Unit = {
  }
  def saveAsVcf(filePath: String): Unit = {
  }
  def saveAsVcf(filePath: String,
                asSingleFile: Boolean,
                deferMerging: Boolean,
                disableFastConcat: Boolean,
                stringency: ValidationStringency): Unit = SaveAsVcf.time {
  }
}

case class DatasetBoundVariantDataset {
  override def saveAsParquet(filePath: String,
                             blockSize: Int = 128 * 1024 * 1024,
                             pageSize: Int = 1 * 1024 * 1024,
                             compressCodec: CompressionCodecName = CompressionCodecName.GZIP,
                             disableDictionaryEncoding: Boolean = false) {
  }
}

sealed abstract class VariantDataset {
  def saveVcfHeaders(filePath: String): Unit = {
  }
}
@heuermh
Copy link
Member Author

heuermh commented Aug 28, 2019

See also #1295

@heuermh
Copy link
Member Author

heuermh commented Oct 1, 2019

See also #2198

@heuermh heuermh added this to the 0.34.0 milestone Jan 28, 2021
@heuermh heuermh modified the milestones: 0.34.0, 0.35.0 Mar 10, 2021
@heuermh heuermh modified the milestones: 0.35.0, 0.36.0 Apr 26, 2021
@heuermh heuermh modified the milestones: 0.36.0, 0.37.0 Jul 23, 2021
@heuermh heuermh modified the milestones: 0.37.0, 0.38.0 Jan 12, 2022
@heuermh heuermh removed this from the 0.38.0 milestone May 9, 2022
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

1 participant