From d9c3a3a908b3c432baf9f4f31bae07ab08957f37 Mon Sep 17 00:00:00 2001 From: akmazian Date: Tue, 4 Jun 2024 16:09:15 -0700 Subject: [PATCH 1/2] typed all files except shell.py --- pyensembl/common.pyi | 20 ++ pyensembl/database.pyi | 131 ++++++++++++ pyensembl/download_cache.pyi | 78 +++++++ pyensembl/ensembl_release.pyi | 51 +++++ pyensembl/ensembl_url_templates.pyi | 82 ++++++++ pyensembl/ensembl_versions.pyi | 19 ++ pyensembl/exon.pyi | 32 +++ pyensembl/fasta.pyi | 35 ++++ pyensembl/gene.pyi | 47 +++++ pyensembl/genome.pyi | 301 ++++++++++++++++++++++++++++ pyensembl/locus.pyi | 63 ++++++ pyensembl/locus_with_genome.pyi | 31 +++ pyensembl/normalization.pyi | 18 ++ pyensembl/reference_name.pyi | 29 +++ pyensembl/search.pyi | 31 +++ pyensembl/sequence_data.pyi | 38 ++++ pyensembl/species.pyi | 237 ++++++++++++++++++++++ pyensembl/transcript.pyi | 110 ++++++++++ pyensembl/version.pyi | 6 + 19 files changed, 1359 insertions(+) create mode 100644 pyensembl/common.pyi create mode 100644 pyensembl/database.pyi create mode 100644 pyensembl/download_cache.pyi create mode 100644 pyensembl/ensembl_release.pyi create mode 100644 pyensembl/ensembl_url_templates.pyi create mode 100644 pyensembl/ensembl_versions.pyi create mode 100644 pyensembl/exon.pyi create mode 100644 pyensembl/fasta.pyi create mode 100644 pyensembl/gene.pyi create mode 100644 pyensembl/genome.pyi create mode 100644 pyensembl/locus.pyi create mode 100644 pyensembl/locus_with_genome.pyi create mode 100644 pyensembl/normalization.pyi create mode 100644 pyensembl/reference_name.pyi create mode 100644 pyensembl/search.pyi create mode 100644 pyensembl/sequence_data.pyi create mode 100644 pyensembl/species.pyi create mode 100644 pyensembl/transcript.pyi create mode 100644 pyensembl/version.pyi diff --git a/pyensembl/common.pyi b/pyensembl/common.pyi new file mode 100644 index 0000000..caa2b83 --- /dev/null +++ b/pyensembl/common.pyi @@ -0,0 +1,20 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Dict, List, Tuple, Union + +def dump_pickle(obj: object, filepath: str) -> None: ... +def load_pickle(filepath: str) -> object: ... +def _memoize_cache_key( + args: Union[List, Tuple], kwargs: Dict[str, Union[List, Tuple]] +): ... +def memoize(fn: function) -> function: ... diff --git a/pyensembl/database.pyi b/pyensembl/database.pyi new file mode 100644 index 0000000..cd39904 --- /dev/null +++ b/pyensembl/database.pyi @@ -0,0 +1,131 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import TYPE_CHECKING, Any, List, Literal, Optional, Union +from .common import memoize + +if TYPE_CHECKING: + import logging + import polars + from .locus import Locus + from sqlite3 import Connection + +# any time we update the database schema, increment this version number +DATABASE_SCHEMA_VERSION = 3 + +logger: logging.Logger = ... + +class Database(object): + def __init__( + self, + gtf_path: str, + install_string: Optional[str] = None, + cache_directory_path: Optional[str] = None, + restrict_gtf_columns: Optional[List[str]] = None, + restrict_gtf_features: Optional[List[str]] = None, + ) -> None: ... + def __eq__(self, other) -> bool: ... + def __str__(self) -> str: ... + def __hash__(self) -> int: ... + @property + def local_db_filename(self) -> str: ... + @property + def local_db_path(self) -> str: ... + def _all_possible_indices(self, column_names: str) -> List[List[str]]: ... + + PRIMARY_KEY_COLUMNS = {"gene": "gene_id", "transcript": "transcript_id"} + + def _get_primary_key( + self, feature_name: str, feature_df: polars.DataFrame + ) -> str: ... + def _feature_indices( + self, all_index_groups: List, primary_key: str, feature_df: polars.DataFrame + ) -> List: ... + def create(self, overwrite: bool = False) -> Connection: ... + def _get_connection(self) -> Connection: ... + @property + def connection(self) -> Connection: ... + def connect_or_create(self, overwrite: bool = False) -> Connection: ... + def columns(self, table_name: str) -> List[str]: ... + def column_exists(self, table_name: str, column_name: str) -> bool: ... + def column_values_at_locus( + self, + column_name: str, + feature: str, + contig: str, + position: int, + end: Optional[int] = None, + strand: Literal["+", "-"] = None, + distinct: bool = False, + sorted: bool = False, + ) -> List[Any]: ... + def distinct_column_values_at_locus( + self, + column: str, + feature: str, + contig: str, + position: int, + end: Optional[int] = None, + strand: Literal["+", "-"] = None, + ) -> List[Any]: ... + def run_sql_query( + self, sql: str, required: bool = False, query_params: List[Union[str, int]] = [] + ) -> List[Any]: ... + @memoize + def query( + self, + select_column_names: List[str], + filter_column: str, + filter_value: str, + feature: Literal["transcript", "gene", "exon", "CDS"], + distinct: bool = False, + required: bool = False, + ) -> List[Any]: ... + def query_one( + self, + select_column_names: List[str], + filter_column: str, + filter_value: str, + feature: Literal["transcript", "gene", "exon", "CDS"], + distinct: bool = False, + required: bool = False, + ): ... + @memoize + def query_feature_values( + self, + column: str, + feature: Literal["transcript", "gene", "exon", "CDS"], + distinct: bool = True, + contig: Optional[str] = None, + strand: Optional[Literal["+", "-"]] = None, + ) -> List[str]: ... + def query_distinct_on_contig( + self, + column_name: str, + feature: Literal["transcript", "gene", "exon", "CDS"], + contig: str, + ) -> List[str]: ... + def query_loci( + self, + filter_column: str, + filter_value: str, + feature: Literal["transcript", "gene", "exon", "CDS"], + ) -> List[Locus]: ... + def query_locus( + self, + filter_column: str, + filter_value: str, + feature: Literal["transcript", "gene", "exon", "CDS"], + ) -> Locus: ... + def _load_gtf_as_dataframe( + self, usecols: Optional[List[str]] = None, features: Optional[List[str]] = None + ) -> polars.DataFrame: ... diff --git a/pyensembl/download_cache.pyi b/pyensembl/download_cache.pyi new file mode 100644 index 0000000..d31e073 --- /dev/null +++ b/pyensembl/download_cache.pyi @@ -0,0 +1,78 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import TYPE_CHECKING, Dict, List, Optional, Tuple, Union + +if TYPE_CHECKING: + import logging + +logger: logging.Logger = ... + +CACHE_BASE_SUBDIR = "pyensembl" +CACHE_DIR_ENV_KEY = "PYENSEMBL_CACHE_DIR" + +def cache_subdirectory( + reference_name: Optional[str] = None, + annotation_name: Optional[str] = None, + annotation_version: Optional[Union[str, int]] = None, +) -> str: ... + +class MissingRemoteFile(Exception): + def __init__(self, url: str) -> None: ... + +class MissingLocalFile(Exception): + def __init__(self, path: str) -> None: ... + def __str__(self) -> str: ... + +class DownloadCache(object): + def __init__( + self, + reference_name: str, + annotation_name: str, + annotation_version: Union[str, int] = None, + decompress_on_download: bool = False, + copy_local_files_to_cache: bool = False, + install_string_function: Optional[function] = None, + cache_directory_path: Optional[str] = None, + ) -> None: ... + @property + def cache_directory_path(self) -> str: ... + def _fields(self) -> Tuple[Tuple[str, Union[str, int, bool]]]: ... + def __eq__(self, other) -> bool: ... + def __hash__(self) -> int: ... + def __str__(self) -> str: ... + def __repr__(self) -> str: ... + def is_url_format(self, path_or_url: str) -> bool: ... + def _remove_compression_suffix_if_present(self, filename: str) -> str: ... + def cached_path(self, path_or_url: str) -> str: ... + def _download_if_necessary( + self, url: str, download_if_missing: bool, overwrite: bool + ) -> str: ... + def _copy_if_necessary(self, local_path: str, overwrite: bool) -> str: ... + def download_or_copy_if_necessary( + self, + path_or_url: str, + download_if_missing: bool = False, + overwrite: bool = False, + ) -> str: ... + def _raise_missing_file_error(self, missing_urls_dict: Dict) -> None: ... + def local_path_or_install_error( + self, + field_name: str, + path_or_url: str, + download_if_missing: bool = False, + overwrite: bool = False, + ) -> str: ... + def delete_cached_files( + self, prefixes: List[str] = [], suffixes: List[str] = [] + ) -> None: ... + def delete_cache_directory(self) -> None: ... diff --git a/pyensembl/ensembl_release.pyi b/pyensembl/ensembl_release.pyi new file mode 100644 index 0000000..6854abf --- /dev/null +++ b/pyensembl/ensembl_release.pyi @@ -0,0 +1,51 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Union, TYPE_CHECKING +from typing_extensions import deprecated + +from .genome import Genome +from .ensembl_versions import MAX_ENSEMBL_RELEASE +from .species import human +from .ensembl_url_templates import ENSEMBL_FTP_SERVER + +if TYPE_CHECKING: + from .species import Species + +class EnsemblRelease(Genome): + @classmethod + def normalize_init_values( + cls, release: Union[int, str], species: Union[Species, str], server: str + ): ... + @classmethod + def cached( + cls, + release: int = MAX_ENSEMBL_RELEASE, + species: Union[str, Species] = human, + server: str = ENSEMBL_FTP_SERVER, + ): ... + def __init__( + self, + release: int = MAX_ENSEMBL_RELEASE, + species: Union[str, Species] = human, + server: str = ENSEMBL_FTP_SERVER, + ): ... + def install_string(self) -> str: ... + def __str__(self) -> str: ... + def __eq__(self, other) -> bool: ... + def __hash__(self) -> int: ... + def to_dict(self) -> dict: ... + @classmethod + def from_dict(cls, state_dict: dict) -> "EnsemblRelease": ... + +@deprecated("Use pyensembl.ensembl_release.EnsemblRelease.cached instead.") +def cached_release(release, species="human") -> EnsemblRelease: ... diff --git a/pyensembl/ensembl_url_templates.pyi b/pyensembl/ensembl_url_templates.pyi new file mode 100644 index 0000000..2b6fa9d --- /dev/null +++ b/pyensembl/ensembl_url_templates.pyi @@ -0,0 +1,82 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +""" +Templates for URLs and paths to specific relase, species, and file type +on the Ensembl ftp server. + +For example, the human chromosomal DNA sequences for release 78 are in: + + https://ftp.ensembl.org/pub/release-78/fasta/homo_sapiens/dna/ + +""" + +from typing import Literal, Tuple, Union, TYPE_CHECKING +if TYPE_CHECKING: + from .species import Species + +ENSEMBL_FTP_SERVER: str = "https://ftp.ensembl.org" +ENSEMBL_PLANTS_FTP_SERVER: str = "https://ftp.ensemblgenomes.ebi.ac.uk/" + +FASTA_SUBDIR_TEMPLATE: str = "/pub/release-%(release)d/fasta/%(species)s/%(type)s/" +PLANTS_FASTA_SUBDIR_TEMPLATE: str = ( + "/pub/release-%(release)d/plants/fasta/%(species)s/%(type)s/" +) +GTF_SUBDIR_TEMPLATE: str = "/pub/release-%(release)d/gtf/%(species)s/" +PLANTS_GTF_SUBDIR_TEMPLATE: str = "/pub/release-%(release)d/plants/gtf/%(species)s/" + +lPlants: Tuple[str] = ("arabidopsis_thaliana", "arabidopsis") + +def normalize_release_properties( + ensembl_release: Union[str, int], species: Union[str, Species] +) -> Tuple[int, str, str]: ... + +GTF_FILENAME_TEMPLATE: str = "%(Species)s.%(reference)s.%(release)d.gtf.gz" + +def make_gtf_filename( + ensembl_release: Union[str, int], species: Union[str, Species] +) -> str: ... +def make_gtf_url( + ensembl_release: Union[str, int], + species: Union[str, Species], + server: str = ENSEMBL_FTP_SERVER, + gtf_subdir=GTF_SUBDIR_TEMPLATE, +) -> str: ... + +OLD_FASTA_FILENAME_TEMPLATE: str = ( + "%(Species)s.%(reference)s.%(release)d.%(sequence_type)s.all.fa.gz" +) + +OLD_FASTA_FILENAME_TEMPLATE_NCRNA: str = ( + "%(Species)s.%(reference)s.%(release)d.ncrna.fa.gz" +) + +NEW_FASTA_FILENAME_TEMPLATE: str = ( + "%(Species)s.%(reference)s.%(sequence_type)s.all.fa.gz" +) + +NEW_FASTA_FILENAME_TEMPLATE_NCRNA: str = "%(Species)s.%(reference)s.ncrna.fa.gz" + +def make_fasta_filename( + ensembl_release: Union[str, int], + species: Union[str, Species], + sequence_type: Literal["ncrna", "cdna", "cds", "pep", "dna", "dna_index"], + is_plant: bool, +) -> str: ... +def make_fasta_url( + ensembl_release: Union[str, int], + species: Union[str, Species], + sequence_type: Literal["ncrna", "cdna", "cds", "pep", "dna", "dna_index"], + is_plant: bool, + server: str = ENSEMBL_FTP_SERVER, + fasta_subdir=FASTA_SUBDIR_TEMPLATE, +) -> str: ... diff --git a/pyensembl/ensembl_versions.pyi b/pyensembl/ensembl_versions.pyi new file mode 100644 index 0000000..f478c7a --- /dev/null +++ b/pyensembl/ensembl_versions.pyi @@ -0,0 +1,19 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Union + +MIN_ENSEMBL_RELEASE: int = 40 +MAX_ENSEMBL_RELEASE: int = 111 +MAX_PLANTS_ENSEMBL_RELEASE: int = 58 + +def check_release_number(release: Union[str, int], squeeze: bool = False) -> int: ... diff --git a/pyensembl/exon.pyi b/pyensembl/exon.pyi new file mode 100644 index 0000000..f618a52 --- /dev/null +++ b/pyensembl/exon.pyi @@ -0,0 +1,32 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Dict, Literal, Union +from .locus import Locus + +class Exon(Locus): + def __init__( + self, + exon_id: str, + contig: str, + start: int, + end: int, + strand: Literal["+", "-"], + gene_name: str, + gene_id: str, + ) -> None: ... + @property + def id(self) -> str: ... + def __str__(self) -> str: ... + def __eq__(self, other) -> bool: ... + def __hash__(self) -> int: ... + def to_dict(self) -> Dict[str, Union[str, int]]: ... diff --git a/pyensembl/fasta.pyi b/pyensembl/fasta.pyi new file mode 100644 index 0000000..c561d1d --- /dev/null +++ b/pyensembl/fasta.pyi @@ -0,0 +1,35 @@ +# Copyright (c) 2015-2016. Mount Sinai School of Medicine +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import TYPE_CHECKING, Dict, Generator, Tuple, Union + +if TYPE_CHECKING: + import logging + from io import BufferedIOBase + +logger: logging.Logger = ... + +def _parse_header_id(line: bytes) -> str: ... + +class FastaParser(object): + def __init__(self) -> None: ... + def read_file(self, fasta_path: str) -> Dict[str, str]: ... + def iterate_over_file( + self, fasta_path: str + ) -> Generator[Tuple[str, str], None, None]: ... + def _open(self, fasta_path: str) -> Union[BufferedIOBase]: ... + def _current_entry(self) -> Tuple[str, str]: ... + def _read_header(self, line: bytes) -> Tuple[str, str]: ... + +def parse_fasta_dictionary(fasta_path: str) -> Dict[str, str]: ... diff --git a/pyensembl/gene.pyi b/pyensembl/gene.pyi new file mode 100644 index 0000000..88b1853 --- /dev/null +++ b/pyensembl/gene.pyi @@ -0,0 +1,47 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import TYPE_CHECKING, List, Literal, Union + +from memoized_property import memoized_property + +from .locus_with_genome import LocusWithGenome + +if TYPE_CHECKING: + from .exon import Exon + from .genome import Genome + from .transcript import Transcript + +class Gene(LocusWithGenome): + def __init__( + self, + gene_id: str, + gene_name: str, + contig: str, + start: int, + end: int, + strand: Literal["+", "-"], + biotype: str, + genome: "Genome", + ) -> None: ... + @property + def id(self) -> str: ... + @property + def name(self) -> str: ... + def __str__(self) -> str: ... + def __eq__(self, other) -> bool: ... + def __hash__(self) -> int: ... + def to_dict(self) -> dict[str, Union[str, int]]: ... + @memoized_property + def transcripts(self) -> List[Transcript]: ... + @memoized_property + def exons(self) -> list[Exon]: ... diff --git a/pyensembl/genome.pyi b/pyensembl/genome.pyi new file mode 100644 index 0000000..04845a9 --- /dev/null +++ b/pyensembl/genome.pyi @@ -0,0 +1,301 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import TYPE_CHECKING, List, Literal, Optional, Tuple, Union + +from serializable import Serializable + +if TYPE_CHECKING: + from .database import Database + from .exon import Exon + from .gene import Gene + from .locus import Locus + from .sequence_data import SequenceData + from .transcript import Transcript + +class Genome(Serializable): + """ + Bundles together the genomic annotation and sequence data associated with + a particular genomic database source (e.g. a single Ensembl release) and + provides a wide variety of helper methods for accessing this data. + """ + + def __init__( + self, + reference_name: str, + annotation_name: str, + annotation_version: Optional[Union[int, str]] = None, + gtf_path_or_url: Optional[str] = None, + transcript_fasta_paths_or_urls: Optional[list[str]] = None, + protein_fasta_paths_or_urls: Optional[list[str]] = None, + decompress_on_download: bool = False, + copy_local_files_to_cache: bool = False, + cache_directory_path: Optional[str] = None, + ) -> None: ... + @property + def requires_gtf(self) -> bool: ... + @property + def requires_transcript_fasta(self) -> bool: ... + @property + def requires_protein_fasta(self) -> bool: ... + def to_dict(self) -> dict: ... + def _init_lazy_fields(self) -> None: ... + def _get_cached_path( + self, field_name, path_or_url, download_if_missing=False, overwrite=False + ): ... + def _get_gtf_path(self, download_if_missing=False, overwrite=False): ... + def _get_transcript_fasta_paths( + self, download_if_missing=False, overwrite=False + ): ... + def _get_protein_fasta_paths(self, download_if_missing=False, overwrite=False): ... + def _set_local_paths(self, download_if_missing=True, overwrite=False): ... + def required_local_files(self) -> List[str]: ... + def required_local_files_exist(self, empty_files_ok: bool = False) -> bool: ... + def download(self, overwrite: bool = False) -> None: ... + def index(self, overwrite: bool = False) -> None: ... + @property + def db(self) -> Database: ... + @property + def protein_sequences(self) -> SequenceData: ... + @property + def transcript_sequences(self) -> SequenceData: ... + def install_string(self) -> str: ... + def __str__(self) -> str: ... + def __repr__(self) -> str: ... + def _fields( + self, + ) -> Tuple[str, str, Union[int, str], str, Tuple[str], Tuple[str]]: ... + def __eq__(self, other) -> bool: ... + def __hash__(self) -> int: ... + def clear_cache(self) -> None: ... + def delete_index_files(self) -> None: ... + def _all_feature_values( + self, + column: str, + feature: str, + distinct: bool = True, + contig: str = None, + strand: Literal["+", "-"] = None, + ) -> List: ... + def transcript_sequence(self, transcript_id: str) -> Optional[str]: ... + def protein_sequence(self, protein_id) -> Optional[str]: ... + def genes_at_locus( + self, + contig: str, + position: int, + end: Optional[int] = None, + strand: Optional[Literal["+", "-"]] = None, + ) -> List[str]: ... + def transcripts_at_locus( + self, + contig: str, + position: int, + end: Optional[int] = None, + strand: Optional[Literal["+", "-"]] = None, + ) -> List[str]: ... + def exons_at_locus( + self, + contig: str, + position: int, + end: Optional[int] = None, + strand: Optional[Literal["+", "-"]] = None, + ) -> List[str]: ... + def gene_ids_at_locus( + self, + contig: str, + position: int, + end: Optional[int] = None, + strand: Optional[Literal["+", "-"]] = None, + ) -> List[str]: ... + def gene_names_at_locus( + self, + contig: str, + position: int, + end: Optional[int] = None, + strand: Optional[Literal["+", "-"]] = None, + ) -> List[str]: ... + def exon_ids_at_locus( + self, + contig: str, + position: int, + end: Optional[int] = None, + strand: Optional[Literal["+", "-"]] = None, + ) -> List[str]: ... + def transcript_ids_at_locus( + self, + contig: str, + position: int, + end: Optional[int] = None, + strand: Optional[Literal["+", "-"]] = None, + ) -> List[str]: ... + def transcript_names_at_locus( + self, + contig: str, + position: int, + end: Optional[int] = None, + strand: Optional[Literal["+", "-"]] = None, + ) -> List[str]: ... + def protein_ids_at_locus( + self, + contig: str, + position: int, + end: Optional[int] = None, + strand: Optional[Literal["+", "-"]] = None, + ) -> List[str]: ... + + ################################################### + # + # Methods which return Locus objects + # containing (contig, start, stop, strand) + # of various genomic entities + # + ################################################### + + def locus_of_gene_id(self, gene_id: str) -> Locus: ... + def loci_of_gene_names(self, gene_name: str) -> Locus: ... + def locus_of_transcript_id(self, transcript_id: str) -> Locus: ... + def locus_of_exon_id(self, exon_id: str) -> Locus: ... + + ################################################### + # + # Contigs + # + ################################################### + + def contigs(self) -> List[str]: ... + + ################################################### + # + # Gene Info Objects + # + ################################################### + + def genes( + self, contig: Optional[str] = None, strand: Optional[Literal["+", "-"]] = None + ) -> List[Gene]: ... + def gene_by_id(self, gene_id: str) -> Gene: ... + def genes_by_name(self, gene_name: str) -> List[Gene]: ... + def gene_by_protein_id(self, protein_id: str) -> Gene: ... + + ################################################### + # + # Gene Names + # + ################################################### + + def _query_gene_name( + self, + property_name: str, + property_value: str, + feature_type: Literal["gene", "transcript", "exon"], + ) -> str: ... + def gene_names( + self, contig: Optional[str] = None, strand: Optional[Literal["+", "-"]] = None + ) -> List[str]: ... + def gene_name_of_gene_id(self, gene_id: str) -> str: ... + def gene_name_of_transcript_id(self, transcript_id: str) -> str: ... + def gene_name_of_transcript_name(self, transcript_name: str) -> str: ... + def gene_name_of_exon_id(self, exon_id: str) -> str: ... + + ################################################### + # + # Gene IDs + # + ################################################### + + def _query_gene_ids( + self, + property_name: str, + value: str, + feature: Literal["gene", "CDS"] = "gene", + ) -> List[str]: ... + def gene_ids( + self, contig: Optional[str] = None, strand: Optional[Literal["+", "-"]] = None + ) -> List[str]: ... + def gene_ids_of_gene_name(self, gene_name: str) -> str: ... + def gene_id_of_protein_id(self, protein_id: str) -> str: ... + + ################################################### + # + # Transcript Info Objects + # + ################################################### + + def transcripts( + self, contig: Optional[str] = None, strand: Optional[Literal["+", "-"]] = None + ) -> List[Transcript]: ... + def transcript_by_id(self, transcript_id: str) -> Transcript: ... + def transcripts_by_name(self, transcript_name: str) -> List[Transcript]: ... + def transcript_by_protein_id(self, protein_id: str) -> Transcript: ... + + ################################################### + # + # Transcript Names + # + ################################################### + + def _query_transcript_names(self, property_name: str, value: str) -> List[str]: ... + def transcript_names( + self, contig: Optional[str] = None, strand: Optional[Literal["+", "-"]] = None + ) -> List[str]: ... + def transcript_names_of_gene_name(self, gene_name: str) -> List[str]: ... + def transcript_name_of_transcript_id(self, transcript_id: str) -> List[str]: ... + + ################################################### + # + # Transcript IDs + # + ################################################### + + def _query_transcript_ids( + self, + property_name: str, + value: str, + feature: Literal["transcript", "CDS"] = "transcript", + ) -> List[str]: ... + def transcript_ids( + self, contig: Optional[str] = None, strand: Optional[Literal["+", "-"]] = None + ) -> List[str]: ... + def transcript_ids_of_gene_id(self, gene_id: str) -> List[str]: ... + def transcript_ids_of_gene_name(self, gene_name: str) -> List[str]: ... + def transcript_ids_of_transcript_name(self, transcript_name: str) -> List[str]: ... + def transcript_ids_of_exon_id(self, exon_id: str) -> List[str]: ... + def transcript_id_of_protein_id(self, protein_id: str) -> List[str]: ... + + ################################################### + # + # Exon Info Objects + # + ################################################### + + def exons( + self, contig: Optional[str] = None, strand: Optional[Literal["+", "-"]] = None + ) -> List[Exon]: ... + def exon_by_id(self, exon_id: str) -> Exon: ... + + ################################################### + # + # Exon IDs + # + ################################################### + + def _query_exon_ids(self, property_name: str, value: str) -> List[str]: ... + def exon_ids( + self, contig: Optional[str] = None, strand: Optional[Literal["+", "-"]] = None + ) -> List[str]: ... + def exon_ids_of_gene_id(self, gene_id: str) -> List[str]: ... + def exon_ids_of_gene_name(self, gene_name: str) -> List[str]: ... + def exon_ids_of_transcript_name(self, transcript_name: str) -> List[str]: ... + def exon_ids_of_transcript_id(self, transcript_id: str) -> List[str]: ... + def protein_ids( + self, contig: Optional[str] = None, strand: Optional[Literal["+", "-"]] = None + ) -> List[str]: ... diff --git a/pyensembl/locus.pyi b/pyensembl/locus.pyi new file mode 100644 index 0000000..06223b5 --- /dev/null +++ b/pyensembl/locus.pyi @@ -0,0 +1,63 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Dict, Literal, Optional, Union +from serializable import Serializable + +class Locus(Serializable): + def __init__( + self, contig: str, start: int, end: int, strand: Literal["+", "-"] + ) -> None: ... + def __str__(self) -> str: ... + def __len__(self) -> int: ... + def __eq__(self, other) -> bool: ... + def to_tuple(self) -> tuple[str, int, int, Literal["+", "-"]]: ... + def __lt__(self, other) -> bool: ... + def __le__(self, other) -> bool: ... + def __gt__(self, other) -> bool: ... + def __ge__(self, other) -> bool: ... + def to_dict(self) -> Dict[str, Union[str, int]]: ... + @property + def length(self) -> int: ... + def offset(self, position: int) -> int: ... + def offset_range(self, start: int, end: int) -> tuple[int, int]: ... + def on_contig(self, contig: Union[str, int]) -> bool: ... + def on_strand(self, strand: Literal["+", "-", 1, -1, "+1", "-1"]) -> bool: ... + @property + def on_forward_strand(self) -> bool: ... + @property + def on_positive_strand(self) -> bool: ... + @property + def on_backward_strand(self) -> bool: ... + @property + def on_negative_strand(self) -> bool: ... + def can_overlap( + self, contig: Union[str, int], strand: Optional[Literal["+", "-"]] = None + ): ... + def distance_to_interval(self, start: int, end: int) -> int: ... + def distance_to_locus(self, other: "Locus") -> Union[int, float]: ... + def overlaps( + self, + contig: Union[str, int], + start: int, + end: int, + strand: Optional[Literal["+", "-"]] = None, + ) -> bool: ... + def overlaps_locus(self, other_locus: "Locus") -> bool: ... + def contains( + self, + contig: Union[str, int], + start: int, + end: int, + strand: Optional[Literal["+", "-"]] = None, + ) -> bool: ... + def contains_locus(self, other_locus: "Locus"): ... diff --git a/pyensembl/locus_with_genome.pyi b/pyensembl/locus_with_genome.pyi new file mode 100644 index 0000000..519e7b5 --- /dev/null +++ b/pyensembl/locus_with_genome.pyi @@ -0,0 +1,31 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import TYPE_CHECKING, Dict, Literal, Union +from .locus import Locus + +if TYPE_CHECKING: + from .genome import Genome + +class LocusWithGenome(Locus): + def __init__( + self, + contig: str, + start: int, + end: int, + strand: Literal["+", "-"], + biotype: str, + genome: Genome, + ): ... + def to_dict(self) -> Dict[str, Union[str, int, Dict]]: ... + @property + def is_protein_coding(self) -> bool: ... diff --git a/pyensembl/normalization.pyi b/pyensembl/normalization.pyi new file mode 100644 index 0000000..dd85bb2 --- /dev/null +++ b/pyensembl/normalization.pyi @@ -0,0 +1,18 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Any, Dict, Literal, Union + +NORMALIZE_CHROMOSOME_CACHE: Dict[Union[str, int], str] = {} + +def normalize_chromosome(c: Union[str, int, Any]) -> str: ... +def normalize_strand(strand: str) -> Literal["+", "-"]: ... diff --git a/pyensembl/reference_name.pyi b/pyensembl/reference_name.pyi new file mode 100644 index 0000000..34f3527 --- /dev/null +++ b/pyensembl/reference_name.pyi @@ -0,0 +1,29 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import TYPE_CHECKING + +if TYPE_CHECKING: + from .ensembl_release import EnsemblRelease + from .species import Species + +def normalize_reference_name(name: str) -> str: ... +def find_species_by_reference(reference_name: str) -> Species: ... +def which_reference(species_name: str, ensembl_release: int) -> str: ... +def max_ensembl_release(reference_name: str) -> int: ... +def genome_for_reference_name( + reference_name: str, allow_older_downloaded_release: bool = True +) -> EnsemblRelease: ... + +ensembl_grch36: EnsemblRelease = ... +ensembl_grch37: EnsemblRelease = ... +ensembl_grch38: EnsemblRelease = ... diff --git a/pyensembl/search.pyi b/pyensembl/search.pyi new file mode 100644 index 0000000..47552a4 --- /dev/null +++ b/pyensembl/search.pyi @@ -0,0 +1,31 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Iterable, TYPE_CHECKING + +if TYPE_CHECKING: + from .locus import Locus + +def find_nearest_locus(start: int, end: int, loci: Iterable[Locus]): + """ + Finds nearest locus (object with method `distance_to_interval`) to the + interval defined by the given `start` and `end` positions. + Returns the distance to that locus, along with the locus object itself. + """ + best_distance = float("inf") + best_locus = None + for locus in loci: + distance = locus.distance_to_interval(start, end) + if best_distance > distance: + best_distance = distance + best_locus = locus + return best_distance, best_locus diff --git a/pyensembl/sequence_data.pyi b/pyensembl/sequence_data.pyi new file mode 100644 index 0000000..444f5da --- /dev/null +++ b/pyensembl/sequence_data.pyi @@ -0,0 +1,38 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import TYPE_CHECKING, Dict, List, Optional + +if TYPE_CHECKING: + import logging + +logger: logging.Logger = ... + +class SequenceData(object): + def __init__( + self, fasta_paths: List[str], cache_directory_path: Optional[str] = None + ) -> None: ... + def _init_lazy_fields(self) -> None: ... + def clear_cache(self) -> None: ... + def __str__(self) -> str: ... + def __repr__(self) -> str: ... + def __contains__(self, sequence_id: str) -> bool: ... + def __eq__(self, other) -> bool: ... + def __hash__(self) -> int: ... + def _add_to_fasta_dictionary( + self, fasta_dictionary_tmp: Dict[str, str] + ) -> None: ... + def _load_or_create_fasta_dictionary_pickle(self) -> None: ... + def index(self, overwrite: bool = False) -> None: ... + @property + def fasta_dictionary(self) -> Dict[str, str]: ... + def get(self, sequence_id: str) -> str: ... diff --git a/pyensembl/species.pyi b/pyensembl/species.pyi new file mode 100644 index 0000000..d00dcb3 --- /dev/null +++ b/pyensembl/species.pyi @@ -0,0 +1,237 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import Dict, Generator, List, Tuple, Union + +from serializable import Serializable + +from .ensembl_versions import MAX_ENSEMBL_RELEASE, MAX_PLANTS_ENSEMBL_RELEASE + +class Species(Serializable): + _latin_names_to_species: dict[str, Species] = {} + _common_names_to_species: dict[str, Species] = {} + _reference_names_to_species: dict[str, Species] = {} + + @classmethod + def register( + cls, + latin_name: str, + synonyms: List[str], + reference_assemblies: Dict[str, Tuple[int, int]], + is_plant: bool = False, + ) -> Species: ... + @classmethod + def all_registered_latin_names(cls) -> List[str]: ... + @classmethod + def all_species_release_pairs(cls) -> Generator[Tuple[str, int], None, None]: ... + def __init__( + self, + latin_name: str, + synonyms: list[str] = [], + reference_assemblies: dict[str, tuple[int, int]] = {}, + is_plant: bool = False, + ): ... + def which_reference(self, ensembl_release: int) -> str: ... + def __str__(self) -> str: ... + def __eq__(self, other) -> bool: ... + def to_dict(self) -> Dict[str, str]: ... + @classmethod + def from_dict(cls, state_dict: Dict[str, str]) -> Species: ... + def __hash__(self) -> int: ... + +def normalize_species_name(name: str) -> str: ... +def find_species_by_name(species_name: str) -> Species: ... +def check_species_object(species_name_or_object: Union[str, Species]) -> Species: ... + +human = Species.register( + latin_name="homo_sapiens", + synonyms=["human"], + reference_assemblies={ + "GRCh38": (76, MAX_ENSEMBL_RELEASE), + "GRCh37": (55, 75), + "NCBI36": (54, 54), + }, +) + +mouse = Species.register( + latin_name="mus_musculus", + synonyms=["mouse", "house mouse"], + reference_assemblies={ + "NCBIM37": (54, 67), + "GRCm38": (68, 102), + "GRCm39": (103, MAX_ENSEMBL_RELEASE), + }, +) + +dog = Species.register( + latin_name="canis_familiaris", + synonyms=["dog"], + reference_assemblies={"CanFam3.1": (75, MAX_ENSEMBL_RELEASE)}, +) + +cat = Species.register( + latin_name="felis_catus", + synonyms=["cat"], + reference_assemblies={ + "Felis_catus_6.2": (75, 90), + "Felis_catus_8.0": (91, 92), + "Felis_catus_9.0": (93, MAX_ENSEMBL_RELEASE), + }, +) + +chicken = Species.register( + latin_name="gallus_gallus", + synonyms=["chicken"], + reference_assemblies={ + "Galgal4": (75, 85), + "Gallus_gallus-5.0": (86, MAX_ENSEMBL_RELEASE), + }, +) + +# Does the black rat (Rattus Rattus) get used for research too? +brown_rat = Species.register( + latin_name="rattus_norvegicus", + synonyms=["brown rat", "lab rat", "rat"], + reference_assemblies={ + "Rnor_5.0": (75, 79), + "Rnor_6.0": (80, 104), + "mRatBN7.2": (105, MAX_ENSEMBL_RELEASE), + }, +) + +macaque = Species.register( + latin_name="macaca_fascicularis", + synonyms=["macaque", "Crab-eating macaque"], + reference_assemblies={ + "Macaca_fascicularis_6.0": (103, MAX_ENSEMBL_RELEASE), + }, +) + +green_monkey = Species.register( + latin_name="chlorocebus_sabaeus", + synonyms=["green_monkey", "african_green_monkey"], + reference_assemblies={ + "ChlSab1.1": (86, MAX_ENSEMBL_RELEASE), + }, +) + +rhesus = Species.register( + latin_name="macaca_mulatta", + synonyms=["rhesus"], + reference_assemblies={"Mmul_10": (75, MAX_ENSEMBL_RELEASE)}, +) + +rabbit = Species.register( + latin_name="oryctolagus_cuniculus", + synonyms=["rabbit"], + reference_assemblies={"OryCun2.0": (75, MAX_ENSEMBL_RELEASE)}, +) + +gerbil = Species.register( + latin_name="meriones_unguiculatus", + synonyms=["gerbil"], + reference_assemblies={"MunDraft-v1.0": (75, MAX_ENSEMBL_RELEASE)}, +) + +syrian_hamster = Species.register( + latin_name="mesocricetus_auratus", + synonyms=["syrian_hamster"], + reference_assemblies={"MesAur1.0": (75, MAX_ENSEMBL_RELEASE)}, +) + +chinese_hamster = Species.register( + latin_name="cricetulus_griseus_chok1gshd", + synonyms=["chinese_hamster"], + reference_assemblies={"CHOK1GS_HDv1": (75, MAX_ENSEMBL_RELEASE)}, +) + +naked_mole_rat = Species.register( + latin_name="heterocephalus_glaber_female", + synonyms=["naked_mole_rat"], + reference_assemblies={"HetGla_female_1.0": (75, MAX_ENSEMBL_RELEASE)}, +) + +guinea_pig = Species.register( + latin_name="cavia_porcellus", + synonyms=["guinea_pig"], + reference_assemblies={"Cavpor3.0": (75, MAX_ENSEMBL_RELEASE)}, +) + +pig = Species.register( + latin_name="sus_scrofa", + synonyms=["pig"], + reference_assemblies={"Sscrofa11.1": (75, MAX_ENSEMBL_RELEASE)}, +) + +zebrafish = Species.register( + latin_name="danio_rerio", + synonyms=["zebrafish"], + reference_assemblies={ + "ZFISH7": (47, 53), + "Zv8": (54, 59), + "Zv9": (60, 79), + "GRCz10": (80, 91), + "GRCz11": (92, MAX_ENSEMBL_RELEASE), + }, +) + +fly = Species.register( + latin_name="drosophila_melanogaster", + synonyms=["drosophila", "fruit fly", "fly"], + reference_assemblies={ + "BDGP5": (75, 78), + "BDGP6": (79, 95), + "BDGP6.22": (96, 98), + "BDGP6.28": (99, 102), + "BDGP6.32": (103, MAX_ENSEMBL_RELEASE), + }, +) + +nematode = Species.register( + latin_name="caenorhabditis_elegans", + synonyms=["nematode", "C_elegans"], + reference_assemblies={ + "WS180": (47, 49), + "WS190": (50, 54), + "WS200": (55, 57), + "WS210": (58, 59), + "WS220": (61, 66), + "WBcel215": (67, 70), + "WBcel235": (71, MAX_ENSEMBL_RELEASE), + }, +) + +yeast = Species.register( + latin_name="saccharomyces_cerevisiae", + synonyms=["yeast", "budding_yeast"], + reference_assemblies={ + "R64-1-1": (76, MAX_ENSEMBL_RELEASE), + }, +) + +arabidopsis_thaliana = Species.register( + latin_name="arabidopsis_thaliana", + synonyms=["arabidopsis"], + reference_assemblies={ + "TAIR10": (40, MAX_PLANTS_ENSEMBL_RELEASE), + }, + is_plant=True, +) + +rice = Species.register( + latin_name="oryza_sativa", + synonyms=["rice"], + reference_assemblies={ + "IRGSP-1.0": (40, MAX_PLANTS_ENSEMBL_RELEASE), + }, + is_plant=True, +) diff --git a/pyensembl/transcript.pyi b/pyensembl/transcript.pyi new file mode 100644 index 0000000..3634b6b --- /dev/null +++ b/pyensembl/transcript.pyi @@ -0,0 +1,110 @@ +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from typing import TYPE_CHECKING, List, Literal, Optional, Tuple, Union + +from memoized_property import memoized_property + +from .common import memoize +from .locus_with_genome import LocusWithGenome + +if TYPE_CHECKING: + from .exon import Exon + from .gene import Gene + from .genome import Genome + +class Transcript(LocusWithGenome): + def __init__( + self, + transcript_id: str, + transcript_name: str, + contig: str, + start: int, + end: int, + strand: Literal["+", "-"], + biotype: str, + gene_id: str, + genome: Genome, + support_level: Optional[int] = None, + ): ... + @property + def id(self) -> str: ... + @property + def name(self) -> str: ... + def __str__(self) -> str: ... + def __len__(self) -> int: ... + def __eq__(self, other) -> bool: ... + def __hash__(self) -> int: ... + def to_dict(self) -> dict[str, Union[str, int]]: ... + @property + def gene(self) -> Gene: ... + @property + def gene_name(self) -> str: ... + @property + def exons(self) -> List[Exon]: ... + + _TRANSCRIPT_FEATURES: set[str] = {"start_codon", "stop_codon", "UTR", "CDS"} + + @memoize + def _transcript_feature_position_ranges( + self, feature: Literal["start_codon", "stop_codon", "UTR", "CDS"], required=True + ) -> List[Tuple[int, int]]: ... + @memoize + def _transcript_feature_positions( + self, feature: Literal["start_codon", "stop_codon", "UTR", "CDS"] + ) -> List[int]: ... + @memoize + def _codon_positions( + self, feature: Literal["start_codon", "stop_codon"] + ) -> List[int]: ... + @memoized_property + def contains_start_codon(self) -> bool: ... + @memoized_property + def contains_stop_codon(self) -> bool: ... + @memoized_property + def start_codon_complete(self) -> bool: ... + @memoized_property + def start_codon_positions(self) -> List[int]: ... + @memoized_property + def stop_codon_positions(self) -> List[int]: ... + @memoized_property + def exon_intervals(self) -> List[Tuple[int, int]]: ... + def spliced_offset(self, position: int) -> int: ... + @memoized_property + def start_codon_unspliced_offsets(self) -> List[int]: ... + @memoized_property + def stop_codon_unspliced_offsets(self) -> List[int]: ... + def _contiguous_offsets(self, offsets: list[int]) -> list[int]: ... + @memoized_property + def start_codon_spliced_offsets(self) -> List[int]: ... + @memoized_property + def stop_codon_spliced_offsets(self) -> List[int]: ... + @memoized_property + def coding_sequence_position_ranges(self) -> List[Tuple[int, int]]: ... + @memoized_property + def complete(self) -> bool: ... + @memoized_property + def sequence(self) -> str: ... + @memoized_property + def first_start_codon_spliced_offset(self) -> int: ... + @memoized_property + def last_stop_codon_spliced_offset(self) -> int: ... + @memoized_property + def coding_sequence(self) -> str: ... + @memoized_property + def five_prime_utr_sequence(self) -> str: ... + @memoized_property + def three_prime_utr_sequence(self) -> str: ... + @memoized_property + def protein_id(self) -> Optional[str]: ... + @memoized_property + def protein_sequence(self) -> Optional[str]: ... diff --git a/pyensembl/version.pyi b/pyensembl/version.pyi new file mode 100644 index 0000000..05687af --- /dev/null +++ b/pyensembl/version.pyi @@ -0,0 +1,6 @@ +__version__: str = ... + +def print_version() -> None: ... + +if __name__ == "__main__": + ... From 20295eb414b21da0bf154172a6ed56a226efaeb3 Mon Sep 17 00:00:00 2001 From: akmazian Date: Tue, 4 Jun 2024 16:11:32 -0700 Subject: [PATCH 2/2] logistics --- MANIFEST.in | 1 + pyensembl/py.typed | 0 2 files changed, 1 insertion(+) create mode 100644 pyensembl/py.typed diff --git a/MANIFEST.in b/MANIFEST.in index 04f196a..e14d269 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -1,2 +1,3 @@ include README.md include LICENSE +include pyensembl/*.pyi \ No newline at end of file diff --git a/pyensembl/py.typed b/pyensembl/py.typed new file mode 100644 index 0000000..e69de29