Skip to content

MethylStore

quantnado.dataset.store_methyl.MethylStore

MethylStore(
    store_path: Path | str,
    sample: str,
    chromsizes: dict[str, int],
    *,
    chunk_len: int,
    compressors: list,
    overwrite: bool = True,
)

Per-sample Zarr store for TAPS/WGBS methylation data.

Stores dense (1, chrom_len) arrays for coverage, methylation_pct, n_methylated, and n_total per chromosome. BAM provides coverage; bedGraph provides methylation values.

Use :meth:from_files to create or :meth:open to read.

Source code in quantnado/dataset/store_methyl.py
def __init__(
    self,
    store_path: Path | str,
    sample: str,
    chromsizes: dict[str, int],
    *,
    chunk_len: int,
    compressors: list,
    overwrite: bool = True,
) -> None:
    self.store_path = _normalize_path(store_path)
    self.sample = sample
    self.chromsizes = chromsizes
    self.chromosomes = sorted(chromsizes.keys())
    self.chunk_len = chunk_len
    self.compressors = compressors

    if overwrite:
        _delete_path(self.store_path)

    self.store_path.parent.mkdir(parents=True, exist_ok=True)
    local_store = LocalStore(str(self.store_path))
    self.root = zarr.group(store=local_store, overwrite=True, zarr_format=3)
    self.meta = create_metadata_group(self.root, sample)
    self._init_arrays()
    self.root.attrs.update({
        "assay": "meth",
        "sample": sample,
        "chromsizes": chromsizes,
        "chunk_len": chunk_len,
    })

from_files classmethod

from_files(
    bam_path: str | Path,
    methyl_path: str | Path,
    store_path: Path | str,
    sample: str,
    chromsizes: str | Path | dict[str, int] | None = None,
    *,
    bam_filter: ReadFilter | None = None,
    count_fragments: bool = False,
    chunk_len: int | None = None,
    construction_compression: str = DEFAULT_CONSTRUCTION_COMPRESSION,
    overwrite: bool = True,
    filter_chromosomes: bool = True,
    test: bool = False,
    test_chromosomes: list[str]
    | tuple[str, ...]
    | None = None,
    log_file: Path | None = None,
) -> "MethylStore"

Create a per-sample MethylStore from a BAM file and a MethylDackel bedGraph.

Parameters:

Name Type Description Default
bam_path str | Path

Aligned BAM file (for coverage).

required
methyl_path str | Path

MethylDackel CpG bedGraph (for methylation values).

required
store_path Path | str

Output .zarr directory.

required
sample str

Sample name.

required
chromsizes str | Path | dict[str, int] | None

Path to .chrom.sizes, dict, or None to infer from BAM header.

None
Source code in quantnado/dataset/store_methyl.py
@classmethod
def from_files(
    cls,
    bam_path: str | Path,
    methyl_path: str | Path,
    store_path: Path | str,
    sample: str,
    chromsizes: str | Path | dict[str, int] | None = None,
    *,
    bam_filter: bamnado.ReadFilter | None = None,
    count_fragments: bool = False,
    chunk_len: int | None = None,
    construction_compression: str = DEFAULT_CONSTRUCTION_COMPRESSION,
    overwrite: bool = True,
    filter_chromosomes: bool = True,
    test: bool = False,
    test_chromosomes: list[str] | tuple[str, ...] | None = None,
    log_file: Path | None = None,
) -> "MethylStore":
    """Create a per-sample MethylStore from a BAM file and a MethylDackel bedGraph.

    Parameters
    ----------
    bam_path:
        Aligned BAM file (for coverage).
    methyl_path:
        MethylDackel CpG bedGraph (for methylation values).
    store_path:
        Output .zarr directory.
    sample:
        Sample name.
    chromsizes:
        Path to .chrom.sizes, dict, or None to infer from BAM header.
    """
    if log_file is not None:
        from quantnado.utils import setup_logging
        setup_logging(Path(log_file), verbose=False)

    bam_path = str(bam_path)

    if chromsizes is None:
        chromsizes = _get_chromsizes_from_bam(bam_path)

    chromsizes_dict = _parse_chromsizes(
        chromsizes,
        filter_chromosomes=filter_chromosomes,
        test=test,
        test_chromosomes=test_chromosomes,
    )

    logger.info(f"Reading methylation data from {methyl_path}")
    by_chrom = _read_bedgraph(
        methyl_path,
        filter_chromosomes=filter_chromosomes,
        test=test,
        test_chromosomes=test_chromosomes,
    )

    resolved_chunk_len = _resolve_chunk_len(chromsizes_dict, Path(store_path), chunk_len)
    compressors = _resolve_compressors(construction_compression)
    read_filter = bam_filter or bamnado.ReadFilter()

    store = cls(
        store_path=store_path,
        sample=sample,
        chromsizes=chromsizes_dict,
        chunk_len=resolved_chunk_len,
        compressors=compressors,
        overwrite=overwrite,
    )

    logger.info(f"Writing BAM coverage for {sample}")
    sparsity = store._write_coverage(bam_path, read_filter, count_fragments)
    logger.info(f"Writing methylation data for {sample}")
    store._write_methylation(by_chrom)
    store._finalise(bam_path, sparsity)

    return store