Skip to content

QuantNado

quantnado.api.QuantNado

QuantNado(store: MultiomicsStore | BamStore)

Unified facade for QuantNado genomic analysis.

Wraps a MultiomicsStore (or a bare BamStore) and provides properties to access each modality plus convenience methods for coverage-based analysis (reduce, extract, count_features, pca, metaplot, tornadoplot, heatmap, correlate, normalise).

Construction

qn = QuantNado.open("dataset/") # MultiomicsStore directory qn = QuantNado.open("coverage.zarr") # BamStore only qn = QuantNado.from_bam_files( # BAM-only dataset ... bam_files=["s1.bam", "s2.bam"], ... store_path="coverage.zarr", ... ) qn = QuantNado.create_dataset( # multi-omics dataset ... store_dir="dataset/", ... bam_files=["s1.bam"], ... methylation_files=["s1.bedGraph"], ... )

Modality access

qn.coverage # BamStore | None qn.methylation # MethylStore | None qn.variants # VariantStore | None qn.modalities # ['coverage', 'methylation', ...]

Source code in quantnado/api.py
def __init__(self, store: MultiomicsStore | BamStore) -> None:
    if isinstance(store, MultiomicsStore):
        self._multiomics: MultiomicsStore | None = store
        self._bam: BamStore | None = store.coverage
    elif isinstance(store, BamStore):
        self._multiomics = None
        self._bam = store
    else:
        raise TypeError(
            f"store must be a MultiomicsStore or BamStore, got {type(store).__name__}"
        )

coverage property

coverage: BamStore | None

The coverage (BAM) sub-store, or None if not present.

methylation property

methylation: MethylStore | None

The methylation sub-store, or None if not present.

variants property

variants: VariantStore | None

The variant sub-store, or None if not present.

modalities property

modalities: list[str]

List of available modalities.

store property

store: BamStore

The underlying coverage BamStore (raises if not present).

store_path property

store_path: Path

Path to the coverage Zarr store.

samples property

samples: list[str]

Sample names in the coverage store.

chromosomes property

chromosomes: list[str]

Chromosome names in the coverage store.

chromsizes property

chromsizes: dict[str, int]

Mapping of chromosome names to sizes.

metadata property

metadata: DataFrame

Metadata DataFrame. Combined across all modalities if multiomics.

n_completed property

n_completed: int

Number of fully processed samples in the coverage store.

open_dataset classmethod

open_dataset(
    path: str | Path, read_only: bool = True
) -> "QuantNado"

Open an existing QuantNado dataset.

Auto-detects whether path is a MultiomicsStore directory or a single BamStore (.zarr).

Parameters:

Name Type Description Default
path str or Path

Path to the store directory or .zarr file.

required
read_only bool

If True, disables write operations (BamStore only).

True

Returns:

Type Description
QuantNado
Source code in quantnado/api.py
@classmethod
def open_dataset(cls, path: str | Path, read_only: bool = True) -> "QuantNado":
    """
    Open an existing QuantNado dataset.

    Auto-detects whether ``path`` is a ``MultiomicsStore`` directory or a
    single ``BamStore`` (``.zarr``).

    Parameters
    ----------
    path : str or Path
        Path to the store directory or ``.zarr`` file.
    read_only : bool, default True
        If True, disables write operations (BamStore only).

    Returns
    -------
    QuantNado
    """
    path = Path(path)
    if str(path).endswith(".zarr"):
        return cls(BamStore.open(path, read_only=read_only))
    zarr_path = path.with_suffix(".zarr")
    if zarr_path.exists():
        return cls(BamStore.open(zarr_path, read_only=read_only))
    return cls(MultiomicsStore.open(path))

from_bam_files classmethod

from_bam_files(
    bam_files: list[str],
    store_path: "str | Path",
    chromsizes: "str | Path | dict[str, int] | None" = None,
    **kwargs,
) -> "QuantNado"

Create a QuantNado dataset from BAM files.

Parameters:

Name Type Description Default
bam_files list of str

BAM file paths.

required
store_path str or Path

Output Zarr store path.

required
chromsizes str, Path, dict, or None

Chromosome sizes. Extracted from first BAM if not provided.

None
**kwargs

Passed through to BamStore.from_bam_files.

{}

Returns:

Type Description
QuantNado
Source code in quantnado/api.py
@classmethod
def from_bam_files(
    cls,
    bam_files: list[str],
    store_path: "str | Path",
    chromsizes: "str | Path | dict[str, int] | None" = None,
    **kwargs,
) -> "QuantNado":
    """
    Create a QuantNado dataset from BAM files.

    Parameters
    ----------
    bam_files : list of str
        BAM file paths.
    store_path : str or Path
        Output Zarr store path.
    chromsizes : str, Path, dict, or None
        Chromosome sizes. Extracted from first BAM if not provided.
    **kwargs
        Passed through to ``BamStore.from_bam_files``.

    Returns
    -------
    QuantNado
    """
    store = BamStore.from_bam_files(
        bam_files=bam_files,
        store_path=store_path,
        chromsizes=chromsizes,
        **kwargs,
    )
    return cls(store)

create_dataset classmethod

create_dataset(
    store_dir: str | Path,
    bam_files: list[str | Path] | None = None,
    methylation_files: list[str | Path] | None = None,
    vcf_files: list[str | Path] | None = None,
    chromsizes: str | Path | dict[str, int] | None = None,
    metadata: DataFrame | Path | str | None = None,
    *,
    sample_column: str = "sample_id",
    bam_sample_names: list[str] | callable | None = None,
    methylation_sample_names: list[str] | None = None,
    vcf_sample_names: list[str] | callable | None = None,
    filter_chromosomes: bool = True,
    stranded: list[str] | dict[str, str] | None = None,
    overwrite: bool = True,
    resume: bool = False,
    max_workers: int = 1,
    chunk_len: int = DEFAULT_CHUNK_LEN,
    construction_compression: str = "default",
    local_staging: bool = False,
    staging_dir: str | Path | None = None,
    test: bool = False,
    log_file: Path | None = None,
) -> "QuantNado"

Create a new QuantNado dataset from genomic files.

At least one of bam_files, methylation_files, or vcf_files must be provided.

Parameters:

Name Type Description Default
store_dir str or Path

Output directory. Created if it does not exist.

required
bam_files list of Path

BAM files for per-base coverage storage.

None
methylation_files list of Path

Methylation files of any supported type. File type is detected from the filename: *num_mc_cxreport* → mC, *num_hmc_cxreport* → hmC, *CXreport* → evoC whole-molecule, everything else → MethylDackel bedGraph.

None
vcf_files list of Path

VCF.gz files (one per sample) for variant storage.

None
chromsizes str, Path, or dict

Chromosome sizes. Extracted from the first BAM if not provided.

None
metadata DataFrame, Path, or str

Sample metadata CSV attached to all sub-stores.

None
sample_column str

Column in metadata matching sample names.

"sample_id"
bam_sample_names list of str or callable

Override sample names for BAM files. A callable receives each Path and returns a str (e.g. lambda p: p.stem).

None
methylation_sample_names list of str

Override sample names for methylation files, in classification order: bedGraph names first, then CXreport, then mC/hmC (one name per sample pair).

None
vcf_sample_names list of str or callable

Override sample names for VCF files. A callable receives each Path and returns a str.

None
filter_chromosomes bool

Keep only canonical chromosomes (chr* without underscores).

True
stranded list of str or dict

Strand-specific coverage configuration. Two forms accepted:

  • list of str — sample names that should store strand arrays using "U" (raw alignment orientation; useful when read1/read2 correction is not needed, e.g. some ATAC-seq applications).
  • dict — maps each stranded sample name to its library type: "R" (ISR / dUTP / TruSeq Stranded mRNA — read1 from reverse strand), "F" (ISF / ligation — read1 from forward strand), or "U". Use a dict for RNA-seq so that read1/read2 orientation is taken into account when assigning reads to the forward or reverse strand.

Example::

stranded={"rna-rep1": "R", "rna-rep2": "R"}
None
overwrite bool

Overwrite existing sub-stores.

True
resume bool

Resume processing an existing sub-store.

False
max_workers int

Parallel threads for processing chromosomes within each sample. Samples are processed sequentially to optimize memory usage.

1
chunk_len int

Zarr chunk size for the position dimension (coverage store).

65536
construction_compression ('default', 'fast', 'none')

Build-time compression profile for the coverage store.

"default"
local_staging bool

Build the coverage store under local scratch before publishing.

False
staging_dir str or Path

Scratch directory for local staging.

None
test bool

Restrict coverage to chr21/chr22/chrY (for testing).

False
log_file Path

Path to write BAM processing logs.

None

Returns:

Type Description
QuantNado
Source code in quantnado/api.py
@classmethod
def create_dataset(
    cls,
    store_dir: str | Path,
    # Input files
    bam_files: list[str | Path] | None = None,
    methylation_files: list[str | Path] | None = None,
    vcf_files: list[str | Path] | None = None,
    # Reference / metadata
    chromsizes: str | Path | dict[str, int] | None = None,
    metadata: pd.DataFrame | Path | str | None = None,
    *,
    sample_column: str = "sample_id",
    # Sample naming
    bam_sample_names: list[str] | callable | None = None,
    methylation_sample_names: list[str] | None = None,
    vcf_sample_names: list[str] | callable | None = None,
    # Coverage options
    filter_chromosomes: bool = True,
    stranded: list[str] | dict[str, str] | None = None,
    # Process control
    overwrite: bool = True,
    resume: bool = False,
    max_workers: int = 1,
    # Store format
    chunk_len: int = DEFAULT_CHUNK_LEN,
    construction_compression: str = "default",
    # Staging
    local_staging: bool = False,
    staging_dir: str | Path | None = None,
    # Misc
    test: bool = False,
    log_file: Path | None = None,
) -> "QuantNado":
    """
    Create a new QuantNado dataset from genomic files.

    At least one of ``bam_files``, ``methylation_files``, or ``vcf_files``
    must be provided.

    Parameters
    ----------
    store_dir : str or Path
        Output directory. Created if it does not exist.
    bam_files : list of Path, optional
        BAM files for per-base coverage storage.
    methylation_files : list of Path, optional
        Methylation files of any supported type. File type is detected from
        the filename: ``*num_mc_cxreport*`` → mC, ``*num_hmc_cxreport*`` →
        hmC, ``*CXreport*`` → evoC whole-molecule, everything else →
        MethylDackel bedGraph.
    vcf_files : list of Path, optional
        VCF.gz files (one per sample) for variant storage.
    chromsizes : str, Path, or dict, optional
        Chromosome sizes. Extracted from the first BAM if not provided.
    metadata : DataFrame, Path, or str, optional
        Sample metadata CSV attached to all sub-stores.
    sample_column : str, default "sample_id"
        Column in ``metadata`` matching sample names.
    bam_sample_names : list of str or callable, optional
        Override sample names for BAM files. A callable receives each
        ``Path`` and returns a ``str`` (e.g. ``lambda p: p.stem``).
    methylation_sample_names : list of str, optional
        Override sample names for methylation files, in classification
        order: bedGraph names first, then CXreport, then mC/hmC (one name
        per sample pair).
    vcf_sample_names : list of str or callable, optional
        Override sample names for VCF files. A callable receives each
        ``Path`` and returns a ``str``.
    filter_chromosomes : bool, default True
        Keep only canonical chromosomes (``chr*`` without underscores).
    stranded : list of str or dict, optional
        Strand-specific coverage configuration. Two forms accepted:

        * **list of str** — sample names that should store strand arrays using
          ``"U"`` (raw alignment orientation; useful when read1/read2
          correction is not needed, e.g. some ATAC-seq applications).
        * **dict** — maps each stranded sample name to its library type:
          ``"R"`` (ISR / dUTP / TruSeq Stranded mRNA — read1 from reverse strand),
          ``"F"`` (ISF / ligation — read1 from forward strand), or ``"U"``.
          Use a dict for RNA-seq so that read1/read2 orientation is taken into
          account when assigning reads to the forward or reverse strand.

        Example::

            stranded={"rna-rep1": "R", "rna-rep2": "R"}

    overwrite : bool, default True
        Overwrite existing sub-stores.
    resume : bool, default False
        Resume processing an existing sub-store.
    max_workers : int, default 1
        Parallel threads for processing chromosomes within each sample.
        Samples are processed sequentially to optimize memory usage.
    chunk_len : int, default 65536
        Zarr chunk size for the position dimension (coverage store).
    construction_compression : {"default", "fast", "none"}, default "default"
        Build-time compression profile for the coverage store.
    local_staging : bool, default False
        Build the coverage store under local scratch before publishing.
    staging_dir : str or Path, optional
        Scratch directory for local staging.
    test : bool, default False
        Restrict coverage to chr21/chr22/chrY (for testing).
    log_file : Path, optional
        Path to write BAM processing logs.

    Returns
    -------
    QuantNado
    """

    # Classify methylation files by filename pattern, tracking names alongside files
    # so name assignment follows type-based reordering rather than alphabetical position.
    _meth_files = [str(f) for f in (methylation_files or [])]
    _meth_names = (
        list(methylation_sample_names)
        if methylation_sample_names
        else [None] * len(_meth_files)
    )
    methyldackel_files, methyldackel_names = [], []
    cxreport_files, cxreport_names = [], []
    mc_files, mc_names = [], []
    hmc_files, hmc_names = [], []
    for _f, _n in zip(_meth_files, _meth_names):
        _fname = Path(_f).name
        if "num_hmc_cxreport" in _fname:
            hmc_files.append(_f)
            hmc_names.append(_n)
        elif "num_mc_cxreport" in _fname:
            mc_files.append(_f)
            mc_names.append(_n)
        elif "CXreport" in _fname:
            cxreport_files.append(_f)
            cxreport_names.append(_n)
        else:
            methyldackel_files.append(_f)
            methyldackel_names.append(_n)
    methyldackel_sample_names = [
        n or Path(f).stem for n, f in zip(methyldackel_names, methyldackel_files)
    ] or None
    cxreport_sample_names = [
        n or Path(f).name.split(".")[0] for n, f in zip(cxreport_names, cxreport_files)
    ] or None
    _cx_names = mc_names or hmc_names
    _cx_files = mc_files or hmc_files
    mc_hmc_sample_names = [
        n or Path(f).name.split(".")[0] for n, f in zip(_cx_names, _cx_files)
    ] or None

    if callable(bam_sample_names) and bam_files is not None:
        bam_sample_names = [bam_sample_names(Path(f)) for f in bam_files]
    if callable(vcf_sample_names) and vcf_files is not None:
        vcf_sample_names = [vcf_sample_names(Path(f)) for f in vcf_files]

    # Pre-flight validation: check files and names before any work starts
    def _require_file(p: str | Path, label: str) -> Path:
        p = Path(p)
        if not p.exists():
            raise FileNotFoundError(f"{label}: file not found: {p}")
        if p.stat().st_size == 0:
            raise ValueError(f"{label}: file is empty: {p}")
        return p

    if bam_files:
        for f in bam_files:
            p = _require_file(f, "BAM")
            if p.suffix.lower() != ".bam":
                raise ValueError(f"BAM: expected .bam extension, got: {p.name}")
            if not (
                p.with_suffix(".bam.bai").exists()
                or Path(str(p) + ".bai").exists()
                or Path(str(p) + ".csi").exists()
            ):
                raise FileNotFoundError(f"BAM: missing index (.bai/.csi) for: {p}")

    for f in methyldackel_files + cxreport_files + mc_files + hmc_files:
        _require_file(f, "methylation")

    if methylation_sample_names and len(methylation_sample_names) != len(_meth_files):
        raise ValueError(
            f"methylation_sample_names length ({len(methylation_sample_names)}) "
            f"!= methylation_files length ({len(_meth_files)})"
        )

    if vcf_files:
        for f in vcf_files:
            p = _require_file(f, "VCF")
            name = p.name.lower()
            if not name.endswith(".vcf.gz"):
                raise ValueError(f"VCF: expected .vcf.gz file, got: {p.name}")
            if not (Path(str(p) + ".tbi").exists() or Path(str(p) + ".csi").exists()):
                raise FileNotFoundError(f"VCF: missing tabix index (.tbi/.csi) for: {p}")

    def _check_dupes(names: list[str], label: str) -> None:
        seen, dupes = set(), set()
        for n in names:
            (dupes if n in seen else seen).add(n)
        if dupes:
            raise ValueError(f"Duplicate sample names in {label}: {sorted(dupes)}")

    if bam_files:
        _bam_names = bam_sample_names or [Path(f).stem for f in bam_files]
        if len(_bam_names) != len(bam_files):
            raise ValueError(
                f"bam_sample_names length ({len(_bam_names)}) != bam_files length ({len(bam_files)})"
            )
        _check_dupes(_bam_names, "bam_sample_names")

    if methyldackel_files or cxreport_files or mc_files or hmc_files:
        _bg = methyldackel_sample_names or [Path(f).stem for f in methyldackel_files]
        _cx = cxreport_sample_names or [Path(f).name.split(".")[0] for f in cxreport_files]
        _mchmc = mc_hmc_sample_names or [Path(f).name.split(".")[0] for f in (mc_files or hmc_files)]
        _check_dupes(_bg, "methylation bedGraph sample names")
        _check_dupes(_cx, "methylation CXreport sample names")
        _check_dupes(_mchmc, "methylation mc/hmc sample names")
        # Cross-category duplicate check with per-name diagnostics
        _tagged: list[tuple[str, str, str]] = (  # (name, category, file)
            [(n, "bedGraph", f) for n, f in zip(_bg, methyldackel_files)]
            + [(n, "CXreport", f) for n, f in zip(_cx, cxreport_files)]
            + [(n, "mc/hmc", f) for n, f in zip(_mchmc, (mc_files or hmc_files))]
        )
        _seen: dict[str, tuple[str, str]] = {}
        _cross_dupes: dict[str, list[str]] = {}
        for _name, _cat, _file in _tagged:
            if _name in _seen:
                _cross_dupes.setdefault(_name, [
                    f"{_seen[_name][0]}: {Path(_seen[_name][1]).name}"
                ]).append(f"{_cat}: {Path(_file).name}")
            else:
                _seen[_name] = (_cat, _file)
        if _cross_dupes:
            _lines = "\n".join(
                f"  '{n}':\n" + "\n".join(f"    - {loc}" for loc in locs)
                for n, locs in sorted(_cross_dupes.items())
            )
            raise ValueError(
                f"Duplicate methylation sample names across file types:\n{_lines}\n"
                "Check that each sample appears in only one methylation file type, "
                "or supply explicit methylation_sample_names to disambiguate."
            )

    if vcf_files:
        _vcf_names = vcf_sample_names or [Path(f).name.split(".")[0] for f in vcf_files]
        if len(_vcf_names) != len(vcf_files):
            raise ValueError(
                f"vcf_sample_names length ({len(_vcf_names)}) != vcf_files length ({len(vcf_files)})"
            )
        _check_dupes(_vcf_names, "vcf_sample_names")

    # Upfront summary of all work to be done, grouped by sample
    sample_modalities: dict[str, list[str]] = {}

    def _register(names, modality):
        for n in names:
            sample_modalities.setdefault(n, []).append(modality)

    if bam_files:
        _bam_names = bam_sample_names or [Path(f).stem for f in bam_files]
        _stranded_set = set(stranded.keys() if isinstance(stranded, dict) else (stranded or []))
        for n in _bam_names:
            _register([n], "stranded_coverage" if n in _stranded_set else "coverage")
    if methyldackel_files and (mc_files or hmc_files):
        _register(
            (methyldackel_sample_names or [Path(f).stem for f in methyldackel_files])
            + (mc_hmc_sample_names or [Path(f).stem for f in (mc_files or hmc_files)]),
            "methylation",
        )
    elif methyldackel_files:
        _register(
            methyldackel_sample_names or [Path(f).stem for f in methyldackel_files],
            "methylation",
        )
    elif cxreport_files:
        _register(
            cxreport_sample_names or [Path(f).stem for f in cxreport_files], "methylation"
        )
    elif mc_files or hmc_files:
        _register(
            mc_hmc_sample_names or [Path(f).stem for f in (mc_files or hmc_files)],
            "methylation",
        )
    if vcf_files:
        _register(vcf_sample_names or [Path(f).stem for f in vcf_files], "variants")

    col_width = max(len(s) for s in sample_modalities) if sample_modalities else 0
    summary_lines = [f"Creating QuantNado dataset at '{store_dir}'"]
    for sample, mods in sample_modalities.items():
        summary_lines.append(f"  {sample:<{col_width}}  [{', '.join(mods)}]")
    logger.info("\n".join(summary_lines))

    ms = MultiomicsStore.from_files(
        store_dir=store_dir,
        bam_files=bam_files,
        methyldackel_files=methyldackel_files,
        cxreport_files=cxreport_files,
        mc_files=mc_files,
        hmc_files=hmc_files,
        vcf_files=vcf_files,
        chromsizes=chromsizes,
        metadata=metadata,
        bam_sample_names=bam_sample_names,
        methyldackel_sample_names=methyldackel_sample_names,
        cxreport_sample_names=cxreport_sample_names,
        mc_hmc_sample_names=mc_hmc_sample_names,
        vcf_sample_names=vcf_sample_names,
        filter_chromosomes=filter_chromosomes,
        overwrite=overwrite,
        resume=resume,
        sample_column=sample_column,
        chunk_len=chunk_len,
        construction_compression=construction_compression,
        local_staging=local_staging,
        staging_dir=staging_dir,
        log_file=log_file,
        max_workers=max_workers,
        test=test,
        stranded=stranded,
    )
    return cls(ms)

get_metadata

get_metadata() -> pd.DataFrame

Return metadata as a DataFrame.

If this is a multiomics store, returns combined metadata across all modalities with a modalities column. Otherwise returns coverage store metadata.

Source code in quantnado/api.py
def get_metadata(self) -> pd.DataFrame:
    """
    Return metadata as a DataFrame.

    If this is a multiomics store, returns combined metadata across all
    modalities with a ``modalities`` column. Otherwise returns coverage
    store metadata.
    """
    if self._multiomics is not None:
        return self._multiomics.get_metadata()
    return self._require_coverage().get_metadata()

to_xarray

to_xarray(
    chromosomes: list[str] | None = None,
    chunks: str | dict | None = None,
) -> dict[str, xr.DataArray]

Extract the coverage dataset as per-chromosome Xarray DataArrays.

Parameters:

Name Type Description Default
chromosomes list of str

Chromosomes to extract. If None, extracts all.

None
chunks str or dict

Dask chunking strategy.

None

Returns:

Type Description
dict[str, DataArray]
Source code in quantnado/api.py
def to_xarray(
    self,
    chromosomes: list[str] | None = None,
    chunks: str | dict | None = None,
) -> dict[str, xr.DataArray]:
    """
    Extract the coverage dataset as per-chromosome Xarray DataArrays.

    Parameters
    ----------
    chromosomes : list of str, optional
        Chromosomes to extract. If None, extracts all.
    chunks : str or dict, optional
        Dask chunking strategy.

    Returns
    -------
    dict[str, DataArray]
    """
    return self._require_coverage().to_xarray(chromosomes=chromosomes, chunks=chunks)

extract_region

extract_region(
    region: str | None = None,
    chrom: str | None = None,
    start: int | None = None,
    end: int | None = None,
    samples: list[str] | list[int] | None = None,
    as_xarray: bool = True,
    normalise: str | None = None,
    normalize: str | None = None,
    library_sizes: Series | dict | None = None,
) -> xr.DataArray | Any

Extract coverage signal for a specific genomic region.

Parameters:

Name Type Description Default
region str

Region string e.g. "chr1:1000-5000".

None
chrom optional

Alternative to region.

None
start optional

Alternative to region.

None
end optional

Alternative to region.

None
samples list of str or int

Sample names or indices. If None, uses all completed samples.

None
as_xarray bool

Return DataArray; if False return numpy array.

True
normalise ('cpm', 'rpkm')

Normalise the extracted signal before returning it. If omitted, raw coverage is returned.

"cpm"
normalize ('cpm', 'rpkm')

American-English alias for normalise.

"cpm"
library_sizes Series or dict

Total mapped reads per sample, indexed by sample name. Overrides automatic lookup from the store when normalise is used.

None

Returns:

Type Description
DataArray or ndarray
Source code in quantnado/api.py
def extract_region(
    self,
    region: str | None = None,
    chrom: str | None = None,
    start: int | None = None,
    end: int | None = None,
    samples: list[str] | list[int] | None = None,
    as_xarray: bool = True,
    normalise: str | None = None,
    normalize: str | None = None,
    library_sizes: pd.Series | dict | None = None,
) -> xr.DataArray | Any:
    """
    Extract coverage signal for a specific genomic region.

    Parameters
    ----------
    region : str, optional
        Region string e.g. ``"chr1:1000-5000"``.
    chrom, start, end : optional
        Alternative to ``region``.
    samples : list of str or int, optional
        Sample names or indices. If None, uses all completed samples.
    as_xarray : bool, default True
        Return DataArray; if False return numpy array.
    normalise : {"cpm", "rpkm"}, optional
        Normalise the extracted signal before returning it. If omitted,
        raw coverage is returned.
    normalize : {"cpm", "rpkm"}, optional
        American-English alias for ``normalise``.
    library_sizes : pd.Series or dict, optional
        Total mapped reads per sample, indexed by sample name. Overrides
        automatic lookup from the store when ``normalise`` is used.

    Returns
    -------
    DataArray or ndarray
    """
    return self._require_coverage().extract_region(
        region=region,
        chrom=chrom,
        start=start,
        end=end,
        samples=samples,
        as_xarray=as_xarray,
        normalise=normalise,
        normalize=normalize,
        library_sizes=library_sizes,
    )

reduce

reduce(
    intervals_path: str | Path | None = None,
    ranges_df: Any | None = None,
    feature_type: FeatureType | str | None = None,
    gtf_path: str
    | Path
    | Iterable[str | Path]
    | None = None,
    reduction: ReductionMethod | str = ReductionMethod.MEAN,
    filter_incomplete: bool = True,
) -> xr.Dataset

Reduce per-sample coverage signal over genomic ranges.

Parameters:

Name Type Description Default
intervals_path str or Path

Path to BED or GTF file.

None
ranges_df DataFrame or PyRanges

Pre-parsed genomic ranges.

None
feature_type FeatureType or str

Predefined feature type to extract from GTF.

None
gtf_path str, Path, or Iterable

Path(s) to GTF file(s).

None
reduction ReductionMethod or str

Aggregation statistic: 'mean', 'sum', 'max', 'min', 'median'.

"mean"
filter_incomplete bool

Exclude samples not yet marked complete.

True

Returns:

Type Description
Dataset

Dimensions: (ranges, sample).

Source code in quantnado/api.py
def reduce(
    self,
    intervals_path: str | Path | None = None,
    ranges_df: Any | None = None,
    feature_type: FeatureType | str | None = None,
    gtf_path: str | Path | Iterable[str | Path] | None = None,
    reduction: ReductionMethod | str = ReductionMethod.MEAN,
    filter_incomplete: bool = True,
) -> xr.Dataset:
    """
    Reduce per-sample coverage signal over genomic ranges.

    Parameters
    ----------
    intervals_path : str or Path, optional
        Path to BED or GTF file.
    ranges_df : DataFrame or PyRanges, optional
        Pre-parsed genomic ranges.
    feature_type : FeatureType or str, optional
        Predefined feature type to extract from GTF.
    gtf_path : str, Path, or Iterable, optional
        Path(s) to GTF file(s).
    reduction : ReductionMethod or str, default "mean"
        Aggregation statistic: 'mean', 'sum', 'max', 'min', 'median'.
    filter_incomplete : bool, default True
        Exclude samples not yet marked complete.

    Returns
    -------
    Dataset
        Dimensions: (ranges, sample).
    """
    return reduce_byranges_signal(
        self._require_coverage(),
        ranges_df=ranges_df,
        intervals_path=intervals_path,
        feature_type=feature_type,
        gtf_path=gtf_path,
        reduction=reduction,
        include_incomplete=not filter_incomplete,
    )

extract

extract(
    intervals_path: str | Path | None = None,
    ranges_df: Any | None = None,
    feature_type: FeatureType | str | None = None,
    gtf_path: str
    | Path
    | Iterable[str | Path]
    | None = None,
    fixed_width: int | None = None,
    upstream: int | None = None,
    downstream: int | None = None,
    anchor: AnchorPoint | str = AnchorPoint.MIDPOINT,
    bin_size: int | None = None,
    bin_agg: ReductionMethod | str = ReductionMethod.MEAN,
    filter_incomplete: bool = True,
    modality: str | None = None,
    variable: str | None = None,
    samples: list[str] | None = None,
    strand_aware: bool = False,
    strand: str | None = None,
    max_workers: int = 1,
) -> xr.DataArray

Extract signal over genomic ranges.

Routes to the appropriate modality sub-store based on modality.

Parameters:

Name Type Description Default
intervals_path str or Path

Path to BED or GTF file.

None
ranges_df DataFrame or PyRanges

Pre-parsed genomic ranges.

None
feature_type FeatureType or str

Predefined feature type to extract from GTF.

None
gtf_path str, Path, or Iterable

Path(s) to GTF file(s).

None
fixed_width int

Symmetric window total width centered on anchor.

None
upstream int

Bases upstream of anchor (cannot combine with fixed_width).

None
downstream int

Bases downstream of anchor (cannot combine with fixed_width).

None
anchor AnchorPoint or str

Anchor point: 'midpoint', 'start', or 'end'.

"midpoint"
bin_size int

Aggregate positions into bins of this size.

None
bin_agg ReductionMethod or str

Aggregation method for binning (coverage only).

"mean"
filter_incomplete bool

Exclude samples not yet marked complete (coverage only).

True
modality ('coverage', 'methylation')

Which sub-store to extract from. Defaults to "coverage".

"coverage"
variable str

For methylation: which variable to extract ("methylation_pct", "n_methylated", "n_unmethylated"). Defaults to "methylation_pct".

None
samples list of str

Subset of sample names. Works for both coverage and methylation.

None
strand_aware bool

Per-interval strand routing: "+" features use _fwd, "-" features use _rev. Coverage only; ignored for methylation.

False
strand ('+', '-')

Force all intervals to use the forward ("+"{chrom}_fwd) or reverse ("-"{chrom}_rev) strand array, regardless of each interval's strand annotation. Use this to obtain separate fwd/rev DataArrays for 🇵🇾meth:metaplot data_rev or 🇵🇾meth:tornadoplot data_rev. Coverage only; ignored for methylation.

"+"
max_workers int

Number of chromosome groups to extract in parallel for coverage data.

1

Returns:

Type Description
DataArray

Dimensions: (interval, relative_position|bin, sample).

Source code in quantnado/api.py
def extract(
    self,
    intervals_path: str | Path | None = None,
    ranges_df: Any | None = None,
    feature_type: FeatureType | str | None = None,
    gtf_path: str | Path | Iterable[str | Path] | None = None,
    fixed_width: int | None = None,
    upstream: int | None = None,
    downstream: int | None = None,
    anchor: AnchorPoint | str = AnchorPoint.MIDPOINT,
    bin_size: int | None = None,
    bin_agg: ReductionMethod | str = ReductionMethod.MEAN,
    filter_incomplete: bool = True,
    modality: str | None = None,
    variable: str | None = None,
    samples: list[str] | None = None,
    strand_aware: bool = False,
    strand: str | None = None,
    max_workers: int = 1,
) -> xr.DataArray:
    """
    Extract signal over genomic ranges.

    Routes to the appropriate modality sub-store based on ``modality``.

    Parameters
    ----------
    intervals_path : str or Path, optional
        Path to BED or GTF file.
    ranges_df : DataFrame or PyRanges, optional
        Pre-parsed genomic ranges.
    feature_type : FeatureType or str, optional
        Predefined feature type to extract from GTF.
    gtf_path : str, Path, or Iterable, optional
        Path(s) to GTF file(s).
    fixed_width : int, optional
        Symmetric window total width centered on anchor.
    upstream : int, optional
        Bases upstream of anchor (cannot combine with ``fixed_width``).
    downstream : int, optional
        Bases downstream of anchor (cannot combine with ``fixed_width``).
    anchor : AnchorPoint or str, default "midpoint"
        Anchor point: 'midpoint', 'start', or 'end'.
    bin_size : int, optional
        Aggregate positions into bins of this size.
    bin_agg : ReductionMethod or str, default "mean"
        Aggregation method for binning (coverage only).
    filter_incomplete : bool, default True
        Exclude samples not yet marked complete (coverage only).
    modality : {"coverage", "methylation"}, optional
        Which sub-store to extract from. Defaults to "coverage".
    variable : str, optional
        For methylation: which variable to extract
        (``"methylation_pct"``, ``"n_methylated"``, ``"n_unmethylated"``).
        Defaults to ``"methylation_pct"``.
    samples : list of str, optional
        Subset of sample names.  Works for both coverage and methylation.
    strand_aware : bool, default False
        Per-interval strand routing: ``"+"`` features use ``_fwd``, ``"-"`` features
        use ``_rev``.  Coverage only; ignored for methylation.
    strand : {"+"  , "-"}, optional
        Force all intervals to use the forward (``"+"`` → ``{chrom}_fwd``) or reverse
        (``"-"`` → ``{chrom}_rev``) strand array, regardless of each interval's strand
        annotation.  Use this to obtain separate fwd/rev DataArrays for
        :py:meth:`metaplot` ``data_rev`` or :py:meth:`tornadoplot` ``data_rev``.
        Coverage only; ignored for methylation.
    max_workers : int, default 1
        Number of chromosome groups to extract in parallel for coverage data.

    Returns
    -------
    DataArray
        Dimensions: (interval, relative_position|bin, sample).
    """
    if modality == "methylation":
        if self.methylation is None:
            raise RuntimeError("No methylation store available.")
        return self.methylation.extract(
            intervals_path=intervals_path,
            ranges_df=ranges_df,
            feature_type=feature_type,
            gtf_path=gtf_path,
            variable=variable or "methylation_pct",
            upstream=upstream,
            downstream=downstream,
            fixed_width=fixed_width,
            anchor=anchor if isinstance(anchor, str) else anchor.value,
            bin_size=bin_size if bin_size is not None else 50,
            samples=samples,
        )

    _coverage = self._require_coverage()
    _sample_indices = None
    if samples is not None:
        _all = getattr(_coverage, "sample_names", None) or []
        _sample_indices = [_all.index(s) for s in samples if s in _all]

    return extract_byranges_signal(
        _coverage,
        ranges_df=ranges_df,
        intervals_path=intervals_path,
        feature_type=feature_type,
        gtf_path=gtf_path,
        fixed_width=fixed_width,
        upstream=upstream,
        downstream=downstream,
        anchor=anchor,
        bin_size=bin_size,
        bin_agg=bin_agg,
        include_incomplete=not filter_incomplete,
        sample_indices=_sample_indices,
        strand_aware=strand_aware,
        force_strand=strand,
        max_workers=max_workers,
    )

count_features

count_features(
    gtf_file: str | Path | None = None,
    bed_file: str | Path | None = None,
    ranges: Any | None = None,
    feature_type: FeatureType | str = FeatureType.GENE,
    feature_id_col: str | list[str] | None = None,
    aggregation: str | None = None,
    strand: str | None = None,
    assays: list[str] | None = None,
    samples: list[str] | None = None,
    filter_chromosomes: bool = True,
    integerize: bool = False,
    fillna_value: float | int | None = 0,
    min_count: int = 1,
    filter_zero: bool = False,
    include_incomplete: bool = False,
) -> tuple[pd.DataFrame, pd.DataFrame]

Generate feature count matrix compatible with DESeq2.

Parameters:

Name Type Description Default
gtf_file str or Path

Path to GTF file.

None
bed_file str or Path

Path to BED file.

None
ranges DataFrame or PyRanges

Pre-parsed genomic ranges.

None
feature_type FeatureType or str

GTF feature type to extract.

"gene"
feature_id_col str or list of str

Column(s) to use as feature identifiers.

None
aggregation str

Column to aggregate sub-features by.

None
strand str or int

Feature strand filtering or read counting mode:

  • "+", "-": filter features by strand annotation before counting
  • 0: unstranded (default)
  • 1 (fr-secondstrand): + features counted from _fwd, - from _rev
  • 2 (fr-firststrand/dUTP): + features counted from _rev, - from _fwd

Int modes require a store built with stranded and a strand column in the GTF.

None
assays list of str

Which assays to include in output.

None
samples list of str

Specific sample names to include in counts (e.g., ['RNA-SEM-1', 'RNA-SEM-2']). If provided, only these samples are processed and returned.

None
filter_chromosomes bool

Keep only canonical chromosomes (chr* without underscores).

True
integerize bool

Round counts to nearest integer for DESeq2.

False
fillna_value float or int or None

Value to fill NaNs before integerization.

0
min_count int

Minimum count threshold for mean masking.

1
filter_zero bool

Remove features with zero counts across all samples.

False
include_incomplete bool

Include samples not yet marked complete.

False

Returns:

Name Type Description
counts DataFrame

Count matrix (features × samples).

features DataFrame

Feature metadata.

Source code in quantnado/api.py
def count_features(
    self,
    gtf_file: str | Path | None = None,
    bed_file: str | Path | None = None,
    ranges: Any | None = None,
    feature_type: FeatureType | str = FeatureType.GENE,
    feature_id_col: str | list[str] | None = None,
    aggregation: str | None = None,
    strand: str | None = None,
    assays: list[str] | None = None,
    samples: list[str] | None = None,
    filter_chromosomes: bool = True,
    integerize: bool = False,
    fillna_value: float | int | None = 0,
    min_count: int = 1,
    filter_zero: bool = False,
    include_incomplete: bool = False,
) -> tuple[pd.DataFrame, pd.DataFrame]:
    """
    Generate feature count matrix compatible with DESeq2.

    Parameters
    ----------
    gtf_file : str or Path, optional
        Path to GTF file.
    bed_file : str or Path, optional
        Path to BED file.
    ranges : DataFrame or PyRanges, optional
        Pre-parsed genomic ranges.
    feature_type : FeatureType or str, default "gene"
        GTF feature type to extract.
    feature_id_col : str or list of str, optional
        Column(s) to use as feature identifiers.
    aggregation : str, optional
        Column to aggregate sub-features by.
    strand : str or int, optional
        Feature strand filtering or read counting mode:

        - ``"+"``, ``"-"``: filter features by strand annotation before counting
        - ``0``: unstranded (default)
        - ``1`` (fr-secondstrand): ``+`` features counted from ``_fwd``, ``-`` from ``_rev``
        - ``2`` (fr-firststrand/dUTP): ``+`` features counted from ``_rev``, ``-`` from ``_fwd``

        Int modes require a store built with ``stranded`` and a ``strand`` column in the GTF.
    assays : list of str, optional
        Which assays to include in output.
    samples : list of str, optional
        Specific sample names to include in counts (e.g., ['RNA-SEM-1', 'RNA-SEM-2']).
        If provided, only these samples are processed and returned.
    filter_chromosomes : bool, default True
        Keep only canonical chromosomes (``chr*`` without underscores).
    integerize : bool, default False
        Round counts to nearest integer for DESeq2.
    fillna_value : float or int or None, default 0
        Value to fill NaNs before integerization.
    min_count : int, default 1
        Minimum count threshold for mean masking.
    filter_zero : bool, default False
        Remove features with zero counts across all samples.
    include_incomplete : bool, default False
        Include samples not yet marked complete.

    Returns
    -------
    counts : DataFrame
        Count matrix (features × samples).
    features : DataFrame
        Feature metadata.
    """
    return _feature_counts(
        self._require_coverage(),
        gtf_file=gtf_file,
        bed_file=bed_file,
        ranges_df=ranges,
        feature_type=feature_type,
        feature_id_col=feature_id_col,
        aggregate_by=aggregation,
        strand=strand,
        assay=assays[0] if assays else None,
        samples=samples,
        filter_chromosomes=filter_chromosomes,
        integerize=integerize,
        fillna_value=fillna_value,
        min_count=min_count,
        filter_zero=filter_zero,
        include_incomplete=include_incomplete,
    )

normalise

normalise(
    data: "xr.Dataset | xr.DataArray | pd.DataFrame",
    *,
    method: str = "cpm",
    library_sizes: "pd.Series | dict | None" = None,
    feature_lengths: "pd.Series | Any | None" = None,
) -> "xr.Dataset | xr.DataArray | pd.DataFrame"

Normalise coverage signal or feature counts.

Parameters:

Name Type Description Default
data Dataset | DataArray | DataFrame

Output of :meth:reduce, :meth:extract, or :meth:count_features.

required
method ('cpm', 'rpkm', 'tpm')

Normalisation method:

  • "cpm" — Counts Per Million (all data types)
  • "rpkm" — RPKM (xr.Dataset and pd.DataFrame; needs feature_lengths)
  • "tpm" — TPM (pd.DataFrame only; no library sizes required)
"cpm"
library_sizes Series or dict

Total mapped reads per sample, indexed by sample name. Auto-read from the store when omitted (requires store built with current QuantNado version).

None
feature_lengths Series or array - like

Feature lengths in base-pairs, aligned to data rows. Required for "rpkm" / "tpm" on DataFrames. Inferred from range_length coord automatically for xr.Dataset.

None

Returns:

Type Description
Same type as ``data``, normalised values.

Examples:

>>> cpm_signal = ds.normalise(ds.reduce(intervals_path="promoters.bed"))
>>> cpm_binned = ds.normalise(binned, method="cpm")
>>> counts, features = ds.count_features(gtf_file="genes.gtf")
>>> tpm = ds.normalise(counts, method="tpm", feature_lengths=features["range_length"])
Source code in quantnado/api.py
def normalise(
    self,
    data: "xr.Dataset | xr.DataArray | pd.DataFrame",
    *,
    method: str = "cpm",
    library_sizes: "pd.Series | dict | None" = None,
    feature_lengths: "pd.Series | Any | None" = None,
) -> "xr.Dataset | xr.DataArray | pd.DataFrame":
    """
    Normalise coverage signal or feature counts.

    Parameters
    ----------
    data : xr.Dataset | xr.DataArray | pd.DataFrame
        Output of :meth:`reduce`, :meth:`extract`, or :meth:`count_features`.
    method : {"cpm", "rpkm", "tpm"}, default "cpm"
        Normalisation method:

        - ``"cpm"``  — Counts Per Million (all data types)
        - ``"rpkm"`` — RPKM (xr.Dataset and pd.DataFrame; needs feature_lengths)
        - ``"tpm"``  — TPM (pd.DataFrame only; no library sizes required)
    library_sizes : pd.Series or dict, optional
        Total mapped reads per sample, indexed by sample name.
        Auto-read from the store when omitted (requires store built with
        current QuantNado version).
    feature_lengths : pd.Series or array-like, optional
        Feature lengths in base-pairs, aligned to data rows.
        Required for ``"rpkm"`` / ``"tpm"`` on DataFrames.
        Inferred from ``range_length`` coord automatically for xr.Dataset.

    Returns
    -------
    Same type as ``data``, normalised values.

    Examples
    --------
    >>> cpm_signal = ds.normalise(ds.reduce(intervals_path="promoters.bed"))
    >>> cpm_binned = ds.normalise(binned, method="cpm")
    >>> counts, features = ds.count_features(gtf_file="genes.gtf")
    >>> tpm = ds.normalise(counts, method="tpm", feature_lengths=features["range_length"])
    """
    return _normalise(
        data,
        self,
        method=method,
        library_sizes=library_sizes,
        feature_lengths=feature_lengths,
    )

pca

pca(
    data: DataArray,
    n_components: int = 10,
    chromosome: str | None = None,
    nan_handling_strategy: str = "drop",
    standardize: bool = False,
    random_state: int | None = None,
    subset_size: int | None = None,
    subset_strategy: str = "random",
) -> tuple[Any, xr.DataArray]

Run PCA on reduced genomic signal data.

Parameters:

Name Type Description Default
data DataArray

Input data with dimensions (feature, sample). Typically output from .reduce() (e.g. reduced["mean"]).

required
n_components int

Number of principal components.

10
chromosome str

If set, restrict to features from this chromosome (requires "chrom" coordinate).

None
nan_handling_strategy str

How to handle NaN values: "drop", "set_to_zero", or "mean_value_imputation".

"drop"
standardize bool

Whether to standardize features to zero mean and unit variance before PCA.

False
random_state int or None

Random state for PCA reproducibility.

None
subset_size int or None

If set, randomly subset to this many features before PCA (for speed).

None
subset_strategy str

Strategy for subsetting features: "random" or "top_variance".

"random"

Returns:

Name Type Description
pca_obj PCA
transformed DataArray

Dimensions: (sample, component).

Source code in quantnado/api.py
def pca(
    self,
    data: xr.DataArray,
    n_components: int = 10,
    chromosome: str | None = None,
    nan_handling_strategy: str = "drop",
    standardize: bool = False,
    random_state: int | None = None,
    subset_size: int | None = None,
    subset_strategy: str = "random",
) -> tuple[Any, xr.DataArray]:
    """
    Run PCA on reduced genomic signal data.

    Parameters
    ----------
    data : DataArray
        Input data with dimensions (feature, sample). Typically output
        from ``.reduce()`` (e.g. ``reduced["mean"]``).
    n_components : int, default 10
        Number of principal components.
    chromosome : str, optional
        If set, restrict to features from this chromosome (requires "chrom" coordinate).
    nan_handling_strategy : str, default "drop"
        How to handle NaN values: "drop", "set_to_zero", or
        "mean_value_imputation".
    standardize : bool, default False
        Whether to standardize features to zero mean and unit variance before PCA.
    random_state : int or None, default None
        Random state for PCA reproducibility.
    subset_size : int or None, default None
        If set, randomly subset to this many features before PCA (for speed).
    subset_strategy : str, default "random"
        Strategy for subsetting features: "random" or "top_variance".

    Returns
    -------
    pca_obj : sklearn.decomposition.PCA
    transformed : DataArray
        Dimensions: (sample, component).
    """
    return _run_pca(
        data,
        n_components=n_components,
        chromosome=chromosome,
        nan_handling_strategy=nan_handling_strategy,
        standardize=standardize,
        random_state=random_state,
        subset_size=subset_size,
        subset_strategy=subset_strategy,
    )

metaplot

metaplot(
    data: DataArray,
    data_rev: DataArray | None = None,
    *,
    modality: str | None = None,
    samples: list[str] | None = None,
    groups: dict[str, list[str]] | None = None,
    flip_minus_strand: bool = True,
    error_stat: str | None = "sem",
    palette: str | list | dict | None = None,
    reference_point: float | None = 0,
    reference_label: str = "TSS",
    xlabel: str = "Relative position",
    ylabel: str | None = None,
    title: str = "Metagene profile",
    figsize: tuple[float, float] = (8, 4),
    ax: Any = None,
    filepath: str | Path | None = None,
) -> Any

Plot a metagene profile from the output of .extract().

Parameters:

Name Type Description Default
data DataArray

Output of .extract() with dims (interval, bin|relative_position, sample).

required
modality str

Sets ylabel and colour defaults: "coverage", "methylation", "variant".

None
samples list of str

Subset of samples to plot (ignored when groups is set).

None
groups dict {label: [samples]}

Group samples for averaging with inter-sample error bands.

None
flip_minus_strand bool

Reverse minus-strand intervals so all profiles run 5'→3'.

True
error_stat ('sem', 'std', None)

Shaded confidence band.

"sem"
reference_point float or None

X position of the vertical reference line. None omits it.

0
reference_label str

Legend label for the reference line.

"TSS"
xlabel str

Axis labels and figure title.

'Relative position'
ylabel str

Axis labels and figure title.

'Relative position'
title str

Axis labels and figure title.

'Relative position'
ax matplotlib Axes

Axes to draw on; creates a new figure if None.

None
filepath str or Path

Save figure to this path if provided.

None

Returns:

Name Type Description
ax Axes
Source code in quantnado/api.py
def metaplot(
    self,
    data: xr.DataArray,
    data_rev: xr.DataArray | None = None,
    *,
    modality: str | None = None,
    samples: list[str] | None = None,
    groups: dict[str, list[str]] | None = None,
    flip_minus_strand: bool = True,
    error_stat: str | None = "sem",
    palette: str | list | dict | None = None,
    reference_point: float | None = 0,
    reference_label: str = "TSS",
    xlabel: str = "Relative position",
    ylabel: str | None = None,
    title: str = "Metagene profile",
    figsize: tuple[float, float] = (8, 4),
    ax: Any = None,
    filepath: str | Path | None = None,
) -> Any:
    """
    Plot a metagene profile from the output of ``.extract()``.

    Parameters
    ----------
    data : DataArray
        Output of ``.extract()`` with dims (interval, bin|relative_position, sample).
    modality : str, optional
        Sets ylabel and colour defaults: "coverage", "methylation", "variant".
    samples : list of str, optional
        Subset of samples to plot (ignored when ``groups`` is set).
    groups : dict {label: [samples]}, optional
        Group samples for averaging with inter-sample error bands.
    flip_minus_strand : bool, default True
        Reverse minus-strand intervals so all profiles run 5'→3'.
    error_stat : {"sem", "std", None}, default "sem"
        Shaded confidence band.
    reference_point : float or None, default 0
        X position of the vertical reference line. None omits it.
    reference_label : str, default "TSS"
        Legend label for the reference line.
    xlabel, ylabel, title : str
        Axis labels and figure title.
    ax : matplotlib Axes, optional
        Axes to draw on; creates a new figure if None.
    filepath : str or Path, optional
        Save figure to this path if provided.

    Returns
    -------
    ax : matplotlib.axes.Axes
    """
    return metaplot(
        data,
        data_rev,
        modality=modality,
        samples=samples,
        groups=groups,
        flip_minus_strand=flip_minus_strand,
        error_stat=error_stat,
        palette=palette,
        reference_point=reference_point,
        reference_label=reference_label,
        xlabel=xlabel,
        ylabel=ylabel,
        title=title,
        figsize=figsize,
        ax=ax,
        filepath=filepath,
    )

tornadoplot

tornadoplot(
    data: DataArray,
    data_rev: DataArray | None = None,
    *,
    modality: str | None = None,
    samples: list[str] | None = None,
    sample_names: list[str] | None = None,
    groups: dict[str, list[str]] | None = None,
    flip_minus_strand: bool = True,
    sort_by: str | None = "mean",
    vmin: float | None = None,
    vmax: float | None = None,
    scale_each: bool = False,
    cmap: str | None = None,
    reference_point: float | None = 0,
    reference_label: str = "TSS",
    xlabel: str = "Relative position",
    ylabel: str | None = None,
    title: str = "Signal heatmap",
    figsize: tuple[float, float] | None = None,
    filepath: str | Path | None = None,
) -> list

Tornado / heatmap plot from the output of .extract().

Parameters:

Name Type Description Default
data DataArray

Output of .extract().

required
data_rev DataArray

Reverse-strand data. When provided each panel shows fwd − rev with a divergent (RdBu_r) colormap centred at zero.

None
modality str

Sets colour defaults: "coverage", "methylation", "variant".

None
samples list of str

Subset of samples (one panel each). Ignored when groups is set.

None
sample_names list of str

Display names for samples.

None
groups dict {label: [samples]}

Average samples within each group (one panel per group).

None
flip_minus_strand bool

Reverse minus-strand intervals before plotting.

True
sort_by ('mean', 'mean_r', 'max', None)

Sort intervals by signal. "mean_r" sorts ascending (lowest at top).

"mean"
vmin float

Colour scale limits.

None
vmax float

Colour scale limits.

None
scale_each bool

Independent per-panel colour scale with a horizontal colourbar beneath each panel.

False
cmap str

Matplotlib colormap.

None
reference_point float or None

X position of the vertical reference line. None omits it.

0
reference_label str

Label for the reference line.

"TSS"
xlabel str
"Relative position"
ylabel str
None
title str
"Signal heatmap"
figsize tuple
None
filepath str or Path
None

Returns:

Name Type Description
axes list of matplotlib.axes.Axes
Source code in quantnado/api.py
def tornadoplot(
    self,
    data: xr.DataArray,
    data_rev: xr.DataArray | None = None,
    *,
    modality: str | None = None,
    samples: list[str] | None = None,
    sample_names: list[str] | None = None,
    groups: dict[str, list[str]] | None = None,
    flip_minus_strand: bool = True,
    sort_by: str | None = "mean",
    vmin: float | None = None,
    vmax: float | None = None,
    scale_each: bool = False,
    cmap: str | None = None,
    reference_point: float | None = 0,
    reference_label: str = "TSS",
    xlabel: str = "Relative position",
    ylabel: str | None = None,
    title: str = "Signal heatmap",
    figsize: tuple[float, float] | None = None,
    filepath: str | Path | None = None,
) -> list:
    """
    Tornado / heatmap plot from the output of ``.extract()``.

    Parameters
    ----------
    data : DataArray
        Output of ``.extract()``.
    data_rev : DataArray, optional
        Reverse-strand data. When provided each panel shows ``fwd − rev``
        with a divergent (RdBu_r) colormap centred at zero.
    modality : str, optional
        Sets colour defaults: "coverage", "methylation", "variant".
    samples : list of str, optional
        Subset of samples (one panel each). Ignored when ``groups`` is set.
    sample_names : list of str, optional
        Display names for samples.
    groups : dict {label: [samples]}, optional
        Average samples within each group (one panel per group).
    flip_minus_strand : bool, default True
        Reverse minus-strand intervals before plotting.
    sort_by : {"mean", "mean_r", "max", None}, default "mean"
        Sort intervals by signal. ``"mean_r"`` sorts ascending (lowest at top).
    vmin, vmax : float, optional
        Colour scale limits.
    scale_each : bool, default False
        Independent per-panel colour scale with a horizontal colourbar beneath each panel.
    cmap : str, optional
        Matplotlib colormap.
    reference_point : float or None, default 0
        X position of the vertical reference line. None omits it.
    reference_label : str, default "TSS"
        Label for the reference line.
    xlabel : str, default "Relative position"
    ylabel : str, optional
    title : str, default "Signal heatmap"
    figsize : tuple, optional
    filepath : str or Path, optional

    Returns
    -------
    axes : list of matplotlib.axes.Axes
    """
    return tornadoplot(
        data,
        data_rev,
        modality=modality,
        samples=samples,
        sample_names=sample_names,
        groups=groups,
        flip_minus_strand=flip_minus_strand,
        sort_by=sort_by,
        vmin=vmin,
        vmax=vmax,
        scale_each=scale_each,
        cmap=cmap,
        reference_point=reference_point,
        reference_label=reference_label,
        xlabel=xlabel,
        ylabel=ylabel,
        title=title,
        figsize=figsize,
        filepath=filepath,
    )

locus_plot

locus_plot(
    locus: str,
    *,
    sample_names: list[str],
    modality: list[str],
    allele_depth_ref: DataArray | None = None,
    allele_depth_alt: DataArray | None = None,
    genotype: DataArray | None = None,
    coverage: DataArray | None = None,
    coverage_fwd: DataArray | None = None,
    coverage_rev: DataArray | None = None,
    methylation: DataArray | None = None,
    palette: str | list | dict | None = None,
    title: str = "Locus plot",
    figsize: tuple[float, float] = (12, 6),
    filepath: str | Path | None = None,
) -> list

Multi-omics genome-browser-style locus plot.

Draws one horizontal track per entry in sample_names, stacked vertically on a shared genomic x-axis. Coverage and methylation data are fetched automatically from the store if not explicitly provided.

Parameters:

Name Type Description Default
locus str

Genomic region, e.g. "chr21:5200000-5260000".

required
sample_names list of str

Sample name for each track (must exist in the relevant sub-store).

required
modality list of str

Modality per track — "coverage", "methylation", or "variant". Must be the same length as sample_names.

required
allele_depth_ref DataArray

Pre-computed reference allele depth from var.extract_region(locus, variable="allele_depth_ref"). Required for any "variant" track.

None
allele_depth_alt DataArray

Pre-computed alternate allele depth from var.extract_region(locus, variable="allele_depth_alt"). Required for any "variant" track.

None
genotype DataArray

Pre-computed genotype array from var.extract_region(locus, variable="genotype"). Optional; used to style variant lollipops by true genotype (blue = het, red = hom-alt). If omitted, genotype is approximated from allele frequency.

None
coverage DataArray

Pre-computed coverage region array. If None and coverage tracks are requested, fetched automatically from the coverage store.

None
methylation DataArray

Pre-computed methylation region array. If None and methylation tracks are requested, fetched automatically from the methylation store.

None
palette str, list, or dict

Colour palette. A dict maps sample names to colours.

None
title str

Figure title.

"Locus plot"
figsize tuple

Figure size (width, height) in inches.

(12, 6)
filepath str or Path

Save figure to this path if provided.

None

Returns:

Name Type Description
axes list of matplotlib.axes.Axes

Examples:

>>> adr = ds.variants.extract_region(locus, variable="allele_depth_ref").compute()
>>> ada = ds.variants.extract_region(locus, variable="allele_depth_alt").compute()
>>> ds.locus_plot(
...     locus="chr21:5200000-5260000",
...     sample_names=["atac", "chip", "meth-rep1", "snp"],
...     modality=["coverage", "coverage", "methylation", "variant"],
...     allele_depth_ref=adr,
...     allele_depth_alt=ada,
...     title="Multi-omics locus",
... )
Source code in quantnado/api.py
def locus_plot(
    self,
    locus: str,
    *,
    sample_names: list[str],
    modality: list[str],
    allele_depth_ref: xr.DataArray | None = None,
    allele_depth_alt: xr.DataArray | None = None,
    genotype: xr.DataArray | None = None,
    coverage: xr.DataArray | None = None,
    coverage_fwd: xr.DataArray | None = None,
    coverage_rev: xr.DataArray | None = None,
    methylation: xr.DataArray | None = None,
    palette: str | list | dict | None = None,
    title: str = "Locus plot",
    figsize: tuple[float, float] = (12, 6),
    filepath: str | Path | None = None,
) -> list:
    """
    Multi-omics genome-browser-style locus plot.

    Draws one horizontal track per entry in ``sample_names``, stacked
    vertically on a shared genomic x-axis. Coverage and methylation data
    are fetched automatically from the store if not explicitly provided.

    Parameters
    ----------
    locus : str
        Genomic region, e.g. ``"chr21:5200000-5260000"``.
    sample_names : list of str
        Sample name for each track (must exist in the relevant sub-store).
    modality : list of str
        Modality per track — ``"coverage"``, ``"methylation"``, or
        ``"variant"``. Must be the same length as ``sample_names``.
    allele_depth_ref : DataArray, optional
        Pre-computed reference allele depth from
        ``var.extract_region(locus, variable="allele_depth_ref")``.
        Required for any ``"variant"`` track.
    allele_depth_alt : DataArray, optional
        Pre-computed alternate allele depth from
        ``var.extract_region(locus, variable="allele_depth_alt")``.
        Required for any ``"variant"`` track.
    genotype : DataArray, optional
        Pre-computed genotype array from
        ``var.extract_region(locus, variable="genotype")``.
        Optional; used to style variant lollipops by true genotype
        (blue = het, red = hom-alt). If omitted, genotype is approximated
        from allele frequency.
    coverage : DataArray, optional
        Pre-computed coverage region array. If None and coverage tracks
        are requested, fetched automatically from the coverage store.
    methylation : DataArray, optional
        Pre-computed methylation region array. If None and methylation
        tracks are requested, fetched automatically from the methylation store.
    palette : str, list, or dict, optional
        Colour palette. A dict maps sample names to colours.
    title : str, default "Locus plot"
        Figure title.
    figsize : tuple, default (12, 6)
        Figure size ``(width, height)`` in inches.
    filepath : str or Path, optional
        Save figure to this path if provided.

    Returns
    -------
    axes : list of matplotlib.axes.Axes

    Examples
    --------
    >>> adr = ds.variants.extract_region(locus, variable="allele_depth_ref").compute()
    >>> ada = ds.variants.extract_region(locus, variable="allele_depth_alt").compute()
    >>> ds.locus_plot(
    ...     locus="chr21:5200000-5260000",
    ...     sample_names=["atac", "chip", "meth-rep1", "snp"],
    ...     modality=["coverage", "coverage", "methylation", "variant"],
    ...     allele_depth_ref=adr,
    ...     allele_depth_alt=ada,
    ...     title="Multi-omics locus",
    ... )
    """
    # Auto-fetch coverage if needed and not already provided
    if coverage is None and any(m == "coverage" for m in modality):
        coverage = self._require_coverage().extract_region(locus)

    # Auto-fetch stranded coverage if needed and not already provided
    if any(m == "stranded_coverage" for m in modality):
        bam = self._require_coverage()
        if coverage_fwd is None:
            coverage_fwd = bam.extract_region(locus, strand="+")
        if coverage_rev is None:
            coverage_rev = bam.extract_region(locus, strand="-")

    # Auto-fetch methylation if needed and not already provided
    if methylation is None and any(m == "methylation" for m in modality):
        if self.methylation is None:
            raise RuntimeError("No methylation store available for auto-fetch.")
        methylation = self.methylation.extract_region(locus, variable="methylation_pct")

    return locus_plot(
        locus,
        sample_names=sample_names,
        modality=modality,
        coverage=coverage,
        coverage_fwd=coverage_fwd,
        coverage_rev=coverage_rev,
        methylation=methylation,
        allele_depth_ref=allele_depth_ref,
        allele_depth_alt=allele_depth_alt,
        genotype=genotype,
        palette=palette,
        title=title,
        figsize=figsize,
        filepath=filepath,
    )

heatmap

heatmap(
    data: "xr.Dataset | xr.DataArray | pd.DataFrame",
    *,
    variable: str | None = None,
    samples: list[str] | None = None,
    log_transform: bool = True,
    cmap: str = "mako",
    figsize: tuple[float, float] = (6, 6),
    title: str = "Signal heatmap",
    filepath: str | Path | None = None,
) -> Any

Clustered heatmap of genomic signal across samples.

Parameters:

Name Type Description Default
data Dataset | DataArray | DataFrame

Output of reduce() (xr.Dataset), a single DataArray variable, or count_features() (pd.DataFrame).

required
variable str

Data variable to plot when data is an xr.Dataset (e.g. "mean", "sum"). Defaults to "mean" if present.

None
samples list of str

Subset of samples to include. Defaults to all samples.

None
log_transform bool

Apply log1p before plotting.

True
cmap str

Colormap name.

"mako"
figsize tuple

Figure size in inches.

(6, 6)
title str

Figure title.

'Signal heatmap'
filepath str or Path

Save figure to this path if provided.

None

Returns:

Name Type Description
g ClusterGrid

Examples:

>>> signal = ds.reduce(intervals_path="promoters.bed", reduction="mean")
>>> g = ds.heatmap(signal, title="Promoter signal")
Source code in quantnado/api.py
def heatmap(
    self,
    data: "xr.Dataset | xr.DataArray | pd.DataFrame",
    *,
    variable: str | None = None,
    samples: list[str] | None = None,
    log_transform: bool = True,
    cmap: str = "mako",
    figsize: tuple[float, float] = (6, 6),
    title: str = "Signal heatmap",
    filepath: str | Path | None = None,
) -> Any:
    """
    Clustered heatmap of genomic signal across samples.

    Parameters
    ----------
    data : xr.Dataset | xr.DataArray | pd.DataFrame
        Output of ``reduce()`` (xr.Dataset), a single DataArray variable,
        or ``count_features()`` (pd.DataFrame).
    variable : str, optional
        Data variable to plot when ``data`` is an ``xr.Dataset``
        (e.g. ``"mean"``, ``"sum"``). Defaults to ``"mean"`` if present.
    samples : list of str, optional
        Subset of samples to include. Defaults to all samples.
    log_transform : bool, default True
        Apply ``log1p`` before plotting.
    cmap : str, default "mako"
        Colormap name.
    figsize : tuple, default (6, 6)
        Figure size in inches.
    title : str
        Figure title.
    filepath : str or Path, optional
        Save figure to this path if provided.

    Returns
    -------
    g : seaborn.matrix.ClusterGrid

    Examples
    --------
    >>> signal = ds.reduce(intervals_path="promoters.bed", reduction="mean")
    >>> g = ds.heatmap(signal, title="Promoter signal")
    """
    return heatmap(
        data,
        variable=variable,
        samples=samples,
        log_transform=log_transform,
        cmap=cmap,
        figsize=figsize,
        title=title,
        filepath=filepath,
    )

correlate

correlate(
    data: "xr.Dataset | xr.DataArray | pd.DataFrame",
    *,
    variable: str | None = None,
    samples: list[str] | None = None,
    method: str = "pearson",
    log_transform: bool = True,
    annotate: bool = True,
    cmap: str = "RdBu_r",
    figsize: tuple[float, float] = (6, 6),
    title: str = "Sample–sample correlation",
    filepath: str | Path | None = None,
) -> "tuple[pd.DataFrame, Any]"

Compute and plot a sample–sample correlation matrix.

Parameters:

Name Type Description Default
data Dataset | DataArray | DataFrame

Output of reduce() or count_features().

required
variable str

Data variable to use when data is an xr.Dataset.

None
samples list of str

Subset of samples. Defaults to all samples.

None
method ('pearson', 'spearman')

Correlation method.

"pearson"
log_transform bool

Apply log1p before computing correlations.

True
annotate bool

Annotate cells with the r value.

True
cmap str

Colormap for the heatmap cells.

"RdBu_r"
figsize tuple

Figure size in inches.

(6, 6)
title str

Figure title.

'Sample–sample correlation'
filepath str or Path

Save figure to this path if provided.

None

Returns:

Name Type Description
corr_df DataFrame

Correlation matrix (samples × samples).

g ClusterGrid

Examples:

>>> signal = ds.reduce(intervals_path="promoters.bed", reduction="mean")
>>> corr_df, g = ds.correlate(signal, title="Promoter signal correlation")
Source code in quantnado/api.py
def correlate(
    self,
    data: "xr.Dataset | xr.DataArray | pd.DataFrame",
    *,
    variable: str | None = None,
    samples: list[str] | None = None,
    method: str = "pearson",
    log_transform: bool = True,
    annotate: bool = True,
    cmap: str = "RdBu_r",
    figsize: tuple[float, float] = (6, 6),
    title: str = "Sample–sample correlation",
    filepath: str | Path | None = None,
) -> "tuple[pd.DataFrame, Any]":
    """
    Compute and plot a sample–sample correlation matrix.

    Parameters
    ----------
    data : xr.Dataset | xr.DataArray | pd.DataFrame
        Output of ``reduce()`` or ``count_features()``.
    variable : str, optional
        Data variable to use when ``data`` is an ``xr.Dataset``.
    samples : list of str, optional
        Subset of samples. Defaults to all samples.
    method : {"pearson", "spearman"}, default "pearson"
        Correlation method.
    log_transform : bool, default True
        Apply ``log1p`` before computing correlations.
    annotate : bool, default True
        Annotate cells with the r value.
    cmap : str, default "RdBu_r"
        Colormap for the heatmap cells.
    figsize : tuple, default (6, 6)
        Figure size in inches.
    title : str
        Figure title.
    filepath : str or Path, optional
        Save figure to this path if provided.

    Returns
    -------
    corr_df : pd.DataFrame
        Correlation matrix (samples × samples).
    g : seaborn.matrix.ClusterGrid

    Examples
    --------
    >>> signal = ds.reduce(intervals_path="promoters.bed", reduction="mean")
    >>> corr_df, g = ds.correlate(signal, title="Promoter signal correlation")
    """
    return correlate(
        data,
        variable=variable,
        samples=samples,
        method=method,
        log_transform=log_transform,
        annotate=annotate,
        cmap=cmap,
        figsize=figsize,
        title=title,
        filepath=filepath,
    )

set_metadata

set_metadata(
    metadata: DataFrame,
    sample_column: str = "sample_id",
    merge: bool = True,
) -> None

Attach metadata to the coverage store.

Parameters:

Name Type Description Default
metadata DataFrame
required
sample_column str
"sample_id"
merge bool

If True, merge with existing metadata. If False, replace entirely.

True
Source code in quantnado/api.py
def set_metadata(
    self,
    metadata: pd.DataFrame,
    sample_column: str = "sample_id",
    merge: bool = True,
) -> None:
    """
    Attach metadata to the coverage store.

    Parameters
    ----------
    metadata : DataFrame
    sample_column : str, default "sample_id"
    merge : bool, default True
        If True, merge with existing metadata. If False, replace entirely.
    """
    self._require_coverage().set_metadata(metadata, sample_column=sample_column, merge=merge)

update_metadata

update_metadata(updates: dict) -> None

Update metadata columns using a dictionary.

Parameters:

Name Type Description Default
updates dict

Column name → list (aligned with sample order) or dict {sample: value}.

required
Source code in quantnado/api.py
def update_metadata(self, updates: dict) -> None:
    """
    Update metadata columns using a dictionary.

    Parameters
    ----------
    updates : dict
        Column name → list (aligned with sample order) or dict {sample: value}.
    """
    self._require_coverage().update_metadata(updates)

list_metadata_columns

list_metadata_columns() -> list[str]

List current metadata column names in the coverage store.

Source code in quantnado/api.py
def list_metadata_columns(self) -> list[str]:
    """List current metadata column names in the coverage store."""
    return self._require_coverage().list_metadata_columns()

remove_metadata_columns

remove_metadata_columns(columns: list[str]) -> None

Remove metadata columns from the coverage store.

Source code in quantnado/api.py
def remove_metadata_columns(self, columns: list[str]) -> None:
    """Remove metadata columns from the coverage store."""
    self._require_coverage().remove_metadata_columns(columns)

metadata_to_csv

metadata_to_csv(path: str | Path) -> None

Export coverage store metadata to CSV.

Source code in quantnado/api.py
def metadata_to_csv(self, path: str | Path) -> None:
    """Export coverage store metadata to CSV."""
    self._require_coverage().metadata_to_csv(path)

metadata_to_json

metadata_to_json(path: str | Path) -> None

Export coverage store metadata to JSON.

Source code in quantnado/api.py
def metadata_to_json(self, path: str | Path) -> None:
    """Export coverage store metadata to JSON."""
    self._require_coverage().metadata_to_json(path)

metadata_from_csv staticmethod

metadata_from_csv(
    path: str | Path, **kwargs
) -> pd.DataFrame

Load a metadata DataFrame from CSV.

Source code in quantnado/api.py
@staticmethod
def metadata_from_csv(path: str | Path, **kwargs) -> pd.DataFrame:
    """Load a metadata DataFrame from CSV."""
    return BamStore.metadata_from_csv(path, **kwargs)

metadata_from_json staticmethod

metadata_from_json(path: str | Path) -> pd.DataFrame

Load a metadata DataFrame from JSON.

Source code in quantnado/api.py
@staticmethod
def metadata_from_json(path: str | Path) -> pd.DataFrame:
    """Load a metadata DataFrame from JSON."""
    return BamStore.metadata_from_json(path)