diff --git a/civicpy/civic.py b/civicpy/civic.py index 1362a48..2e958b6 100644 --- a/civicpy/civic.py +++ b/civicpy/civic.py @@ -7,16 +7,19 @@ import os from pathlib import Path from collections import defaultdict, namedtuple -from civicpy import REMOTE_CACHE_URL, LOCAL_CACHE_PATH, CACHE_TIMEOUT_DAYS import requests -from civicpy import exports import deprecation -from civicpy.__version__ import __version__ from datetime import datetime, timedelta from backports.datetime_fromisoformat import MonkeyPatch MonkeyPatch.patch_fromisoformat() import re +from civicpy import REMOTE_CACHE_URL, LOCAL_CACHE_PATH, CACHE_TIMEOUT_DAYS +from civicpy.__version__ import __version__ +from civicpy import exports +from civicpy import graphql_payloads +from civicpy import utils + CACHE = dict() @@ -35,10 +38,11 @@ LINKS_URL = 'https://civicdb.org/links' -UNMARKED_PLURALS = {'evidence'} CIVIC_TO_PYCLASS = { - 'evidence_items': 'evidence' + 'evidence_items': 'evidence', + 'five_prime_coordinates': 'coordinates', + 'three_prime_ccodinates': 'coordinates', } @@ -62,35 +66,6 @@ class CoordinateQuery(_CoordinateQuery): # Wrapping for documentation pass -def pluralize(string): - if string in UNMARKED_PLURALS: - return '{}_items'.format(string) - if string.endswith('s'): - return string - return string + 's' - - -def singularize(string): - string = string.rstrip('s') - if string == 'evidence_item': - string = 'evidence' - return string - - -def search_url(element, use_search_meta): - element = pluralize(element).lower() - components = [API_URL, element] - if use_search_meta: - components.append('search') - return '/'.join(components) - - -def snake_to_camel(snake_string): - words = snake_string.split('_') - cap_words = [x.capitalize() for x in words] - return ''.join(cap_words) - - def element_lookup_by_id(element_type, element_id): e = _request_by_ids(element_type, [int(element_id)])[0] e = _postprocess_response_element(e, element_type) @@ -98,8 +73,8 @@ def element_lookup_by_id(element_type, element_id): def get_class(element_type): - e_string = singularize(element_type) - class_string = snake_to_camel(e_string) + e_string = utils.singularize(element_type) + class_string = utils.snake_to_camel(e_string) cls = getattr(MODULE, class_string, CivicAttribute) return cls @@ -195,14 +170,14 @@ def load_cache(local_cache_path=LOCAL_CACHE_PATH, on_stale='auto'): with open(local_cache_path, 'rb') as pf: loaded_cache = pickle.load(pf) c = dict() - variants = set() + variants_with_coords = set() for k, v in loaded_cache.items(): if isinstance(k, str): c[k] = v elif isinstance(k, int): c[hash(v)] = v - if v.type == 'variant': - variants.add(v) + if isinstance(v, GeneVariant) or isinstance(v, FusionVariant): + variants_with_coords.add(v) else: raise ValueError old_cache = MODULE.CACHE @@ -212,7 +187,7 @@ def load_cache(local_cache_path=LOCAL_CACHE_PATH, on_stale='auto'): continue v.update() if _has_full_cached_fresh() or on_stale == 'ignore': - _build_coordinate_table(variants) + _build_coordinate_table(variants_with_coords) return True elif (on_stale == 'auto' and local_cache_path == LOCAL_CACHE_PATH) or on_stale == 'update': MODULE.CACHE = old_cache @@ -267,18 +242,36 @@ def update_cache(from_remote_cache=True, remote_cache_url=REMOTE_CACHE_URL, else: molecular_profiles = _get_elements_by_ids('molecular_profile', allow_cached=False, get_all=True) genes = _get_elements_by_ids('gene', allow_cached=False, get_all=True) + factors = _get_elements_by_ids('factor', allow_cached=False, get_all=True) + fusions = _get_elements_by_ids('fusion', allow_cached=False, get_all=True) variants = _get_elements_by_ids('variant', allow_cached=False, get_all=True) evidence = _get_elements_by_ids('evidence', allow_cached=False, get_all=True) assertions = _get_elements_by_ids('assertion', allow_cached=False, get_all=True) variant_groups = _get_elements_by_ids('variant_group', allow_cached=False, get_all=True) + sources = _get_elements_by_ids('source', allow_cached=False, get_all=True) + diseases = _get_elements_by_ids('disease', allow_cached=False, get_all=True) + therapies = _get_elements_by_ids('therapy', allow_cached=False, get_all=True) + phenotypes = _get_elements_by_ids('phenotype', allow_cached=False, get_all=True) for e in evidence: e.assertions = [a for a in assertions if a.id in e.assertion_ids] + e.therapies = [t for t in therapies if t.id in e.therapy_ids] e._partial = False CACHE[hash(e)] = e for g in genes: - g.variants = [v for v in variants if v.gene_id == g.id] + g.sources = [s for s in sources if s.id in g.source_ids] + g.variants = [v for v in variants if v.feature_id == g.id] g._partial = False CACHE[hash(g)] = g + for f in factors: + f.sources = [s for s in sources if s.id in f.source_ids] + f.variants = [v for v in variants if v.feature_id == f.id] + f._partial = False + CACHE[hash(f)] = f + for f in fusions: + f.sources = [s for s in sources if s.id in f.source_ids] + f.variants = [v for v in variants if v.feature_id == f.id] + f._partial = False + CACHE[hash(f)] = f for v in variants: v.variant_groups = [vg for vg in variant_groups if v.id in vg.variant_ids] v.molecular_profiles = [mp for mp in molecular_profiles if v.id in mp.variant_ids] @@ -286,17 +279,59 @@ def update_cache(from_remote_cache=True, remote_cache_url=REMOTE_CACHE_URL, CACHE[hash(v)] = v for a in assertions: a.evidence_items = [e for e in evidence if e.id in a.evidence_ids] + a.therapies = [t for t in therapies if t.id in a.therapy_ids] + a._partial = False CACHE[hash(a)] = a for vg in variant_groups: + vg.sources = [s for s in sources if s.id in vg.source_ids] vg.variants = [v for v in variants if v.id in vg.variant_ids] vg._partial = False CACHE[hash(vg)] = vg for mp in molecular_profiles: + mp.sources = [s for s in sources if s.id in mp.source_ids] mp.evidence_items = [e for e in evidence if e.molecular_profile_id == mp.id] mp.variants = [v for v in variants if v.id in mp.variant_ids] mp.assertions = [a for a in assertions if a.molecular_profile_id == mp.id] + updated_parsed_name = [] + for pn in mp.parsed_name: + if pn.type == 'Feature': + if pn.featureType == 'GENE': + pn = [g for g in genes if g.id == pn.id][0] + elif pn.featureType == 'FACTOR': + pn = [f for f in factors if f.id == pn.id][0] + elif pn.featureType == 'FUSION': + pn = [f for f in fusions if f.id == pn.id][0] + elif pn.type == 'Variant': + pn = [v for v in variants if v.id == pn.id][0] + else: + pn = pn.text + updated_parsed_name.append(pn) + mp.parsed_name = updated_parsed_name mp._partial = False CACHE[hash(mp)] = mp + for s in sources: + s.evidence_items = [e for e in evidence if s.id == e.source_id] + s.genes = [g for g in genes if s.id in g.source_ids] + s.factors = [f for f in factors if s.id in f.source_ids] + s.fusions = [f for f in fusions if s.id in f.source_ids] + s.molecular_profiles = [m for m in molecular_profiles if s.id in m.source_ids] + s._partial = False + CACHE[hash(s)] = s + for d in diseases: + d.evidence_items = [e for e in evidence if d.id == e.disease_id] + d.assertions = [a for a in assertions if d.id == a.disease_id] + d._partial = False + CACHE[hash(d)] = d + for t in therapies: + t.evidence_items = [e for e in evidence if t.id in e.therapy_ids] + t.assertions = [a for a in assertions if t.id in a.therapy_ids] + t._partial = False + CACHE[hash(t)] = t + for p in phenotypes: + p.evidence_items = [e for e in evidence if p.id in e.phenotype_ids] + p.assertions = [a for a in assertions if p.id in a.phenotype_ids] + p._partial = False + CACHE[hash(p)] = p CACHE['full_cached'] = datetime.now() _build_coordinate_table(variants) save_cache(local_cache_path=local_cache_path) @@ -308,6 +343,51 @@ def _make_local_cache_path_if_missing(local_cache_path): os.makedirs(p.parent) +def _build_coordinate_table(variants): + variant_records = list() + for v in variants: + if isinstance(v, GeneVariant): + c = v.coordinates + start = getattr(c, 'start', None) + stop = getattr(c, 'stop', None) + chr = getattr(c, 'chromosome', None) + alt = getattr(c, 'variant_bases', None) + ref = getattr(c, 'reference_bases', None) + if all([start, stop, chr]): + variant_records.append([chr, start, stop, alt, ref, hash(v)]) + else: + continue + if isinstance(v, FusionVariant): + c = v.five_prime_coordinates + start = getattr(c, 'start', None) + stop = getattr(c, 'stop', None) + chr = getattr(c, 'chromosome', None) + alt = getattr(c, 'variant_bases', None) + ref = getattr(c, 'reference_bases', None) + if all([start, stop, chr]): + variant_records.append([chr, start, stop, alt, ref, hash(v)]) + else: + continue + c = v.three_prime_coordinates + start = getattr(c, 'start', None) + stop = getattr(c, 'stop', None) + chr = getattr(c, 'chromosome', None) + alt = getattr(c, 'variant_bases', None) + ref = getattr(c, 'reference_bases', None) + if all([start, stop, chr]): + variant_records.append([chr, start, stop, alt, ref, hash(v)]) + else: + continue + df = pd.DataFrame.from_records( + variant_records, + columns=['chr', 'start', 'stop', 'alt', 'ref', 'v_hash'] + ).sort_values(by=['chr', 'start', 'stop', 'alt', 'ref']) + MODULE.COORDINATE_TABLE = df + MODULE.COORDINATE_TABLE_START = df.start.sort_values() + MODULE.COORDINATE_TABLE_STOP = df.stop.sort_values() + MODULE.COORDINATE_TABLE_CHR = df.chr.sort_values() + + class CivicRecord: """ As a base class, :class:`CivicRecord` is used to define the characteristic of all records in CIViC. This class is not @@ -325,7 +405,7 @@ def __init__(self, partial=False, **kwargs): of the object attributes (except ``type``) must be specified as keyword arguments, or the ``partial`` parameter must be set to **True**. If ``partial`` is set to **True**, the ``id`` keyword argument is still required. - Users are encouraged to use the functions for `getting records`_ in lieu of directly initializing record + Users are encouraged to use the functions for :ref:`getting_records` in lieu of directly initializing record objects. :param bool partial: Indicates whether the the set of object attributes passed is incomplete. If set to **True** the ``id`` keyword is required. @@ -363,7 +443,7 @@ def __init__(self, partial=False, **kwargs): result = list() for data in v: if isinstance(data, dict): - data['type'] = data.get('type', singularize(field)) + data['type'] = data.get('type', utils.singularize(field)) result.append(cls(partial=True, **data)) else: result.append(data) @@ -409,7 +489,7 @@ def update(self, allow_partial=True, force=False, **kwargs): :class:`CivicRecord` instance. :param bool allow_partial: Flag to indicate whether the record will be updated according to the contents of CACHE, without requiring all attributes to be assigned. - :param bool force: Flag to indicate whether to force an update from the server, even if a full record ecists in the cache. + :param bool force: Flag to indicate whether to force an update from the server, even if a full record exists in the cache. :return: True if record is complete after update, else False. """ if kwargs: @@ -433,12 +513,14 @@ def site_link(self): """Returns a URL to the record on the CIViC web application.""" return '/'.join([LINKS_URL, self.type, str(self.id)]) + class MolecularProfile(CivicRecord): _SIMPLE_FIELDS = CivicRecord._SIMPLE_FIELDS.union({ 'description', 'molecular_profile_score', 'name', 'variant_ids', + 'source_ids', }) _COMPLEX_FIELDS = CivicRecord._COMPLEX_FIELDS.union({ 'aliases', @@ -446,24 +528,21 @@ class MolecularProfile(CivicRecord): 'evidence_items', 'sources', 'variants', - 'parsed_name' + 'parsed_name', }) def __init__(self, **kwargs): self._evidence_items = [] self._assertions = [] self._variants = [] - - # Convert parsed name types from camel to snake case - parsed_name = kwargs.get('parsed_name') - if parsed_name: - for pn in parsed_name: - pn['type'] = ''.join(['_' + c.lower() if c.isupper() else c for c in pn['type']]).lstrip('_') - + self._sources = [] super().__init__(**kwargs) @property def evidence_sources(self): + """ + A list of :class:`Source` records associated with all the :class:`Evidence` records under this molecular profile. + """ sources = set() for evidence in self.evidence_items: if evidence.source is not None: @@ -472,18 +551,27 @@ def evidence_sources(self): @property def summary(self): + """ + A shorthand for the description. + """ return self.description - @summary.setter - def summary(self, value): - self.description = value + #@summary.setter + #def summary(self, value): + # self.description = value @property def evidence(self): + """ + A shorthand for evidence_items. + """ return self.evidence_items @property def evidence_items(self): + """ + A list of :class:`Evidence` records associated with this molecular profile. + """ return [e for e in self._evidence_items if e.status in self._include_status] @evidence_items.setter @@ -492,6 +580,9 @@ def evidence_items(self, value): @property def assertions(self): + """ + A list of :class:`Assertion` records associated with this molecular profile. + """ return [a for a in self._assertions if a.status in self._include_status] @assertions.setter @@ -500,12 +591,26 @@ def assertions(self, value): @property def variants(self): + """ + A list :class:`Variant` objects involved in this molecular profile. + """ return self._variants @variants.setter def variants(self, value): self._variants = value + @property + def sources(self): + """ + A list :class:`Source` objects involved in this molecular profile. + """ + return self._sources + + @sources.setter + def sources(self, value): + self._sources = value + def sanitized_name(self): name = self.name words = [] @@ -520,51 +625,58 @@ def sanitized_name(self): class Variant(CivicRecord): _SIMPLE_FIELDS = CivicRecord._SIMPLE_FIELDS.union({ - 'allele_registry_id', - 'gene_id', + 'subtype', + 'feature_id', 'name', 'single_variant_molecular_profile_id', - 'entrez_name', - 'entrez_id' }) _COMPLEX_FIELDS = CivicRecord._COMPLEX_FIELDS.union({ - 'clinvar_entries', - 'coordinates', # 'errors', - 'hgvs_expressions', #'lifecycle_actions', # 'provisional_values', 'variant_aliases', 'variant_groups', - 'variant_types'}) + 'variant_types' + }) def __init__(self, **kwargs): - # Handle overloaded evidence_items from some advanced search views kwargs['type'] = 'variant' self._variant_groups = [] self._molecular_profiles = [] - coordinates = kwargs.get('coordinates') - if coordinates: - if coordinates.get('reference_bases') in ['', '-']: - coordinates['reference_bases'] = None - if coordinates.get('variant_bases') in ['', '-']: - coordinates['variant_bases'] = None super().__init__(**kwargs) + def __repr__(self): + return ''.format(self.type, self.subtype, self.id) + @property def aliases(self): + """ + A curated list of aliases by which this variant is references. Shorthand for the ``variant_aliases`` attribute. + """ return self.variant_aliases @property def groups(self): + """ + A list of :class:`VariantGroup` records to which this variant belongs. Shorthand for the ``variant_groups`` attribute. + """ return self.variant_groups @property def types(self): + """ + A list of :class:`CivicAttribute` objects describing `variant types`_ from the `Sequence Ontology`_. + + .. _variant types: https://docs.civicdb.org/en/latest/model/variants/types.html + .. _Sequence Ontology: http://www.sequenceontology.org/ + """ return self.variant_types @property def variant_groups(self): + """ + A list of :class:`VariantGroup` records to which this variant belongs. + """ return self._variant_groups @variant_groups.setter @@ -573,32 +685,69 @@ def variant_groups(self, value): @property def molecular_profiles(self): + """ + A list of :class:`MolecularProfile` records involving this variant. + """ for mp in self._molecular_profiles: mp._include_status = self._include_status - return [mp for mp in self._molecular_profiles if mp.evidence] + return [m for m in self._molecular_profiles if m.evidence_items or m.assertions] @molecular_profiles.setter def molecular_profiles(self, value): self._molecular_profiles = value - @property - def gene(self): - return _get_element_by_id('gene', self.gene_id) - @property def single_variant_molecular_profile(self): + """ + The :class:`MolecularProfile` record representing the single variant on its own. + """ mp = _get_element_by_id('molecular_profile', self.single_variant_molecular_profile_id) mp._include_status = self._include_status return mp + +class GeneVariant(Variant): + _SIMPLE_FIELDS = Variant._SIMPLE_FIELDS.union({ + 'allele_registry_id', + 'entrez_name', + 'entrez_id' + }) + _COMPLEX_FIELDS = Variant._COMPLEX_FIELDS.union({ + 'clinvar_entries', + 'coordinates', + 'hgvs_expressions', + #'lifecycle_actions', + # 'provisional_values', + }) + + @property + def gene(self): + """ + The :class:`Gene` record this variant belongs to. + """ + return _get_element_by_id('gene', self.feature_id) + + @property + def feature(self): + """ + The :class:`Gene` feature this variant belongs to. + """ + return self.gene + @property def is_insertion(self): + """ + Based on the coordiantes, True if the variant is an insertion, else False. + """ ref = self.coordinates.reference_bases alt = self.coordinates.variant_bases return (ref is None and alt is not None) or (ref is not None and alt is not None and len(ref) < len(alt)) @property def is_deletion(self): + """ + Based on the coordiantes, True if the variant is a deletion, else False. + """ ref = self.coordinates.reference_bases alt = self.coordinates.variant_bases if alt is not None and (alt == '-' or alt == ''): @@ -643,6 +792,7 @@ def vcf_coordinates(self): start = self.coordinates.start ext = "/sequence/region/human/{}:{}-{}".format(self.coordinates.chromosome, start, start) r = requests.get(ensembl_server+ext, headers={ "Content-Type" : "text/plain"}) + r.raise_for_status() if self.coordinates.reference_bases == None or self.coordinates.reference_bases == '-' or self.coordinates.reference_bases == '': ref = r.text else: @@ -655,6 +805,7 @@ def vcf_coordinates(self): start = self.coordinates.start - 1 ext = "/sequence/region/human/{}:{}-{}".format(self.coordinates.chromosome, start, start) r = requests.get(ensembl_server+ext, headers={ "Content-Type" : "text/plain"}) + r.raise_for_status() ref = "{}{}".format(r.text, self.coordinates.reference_bases) if self.coordinates.variant_bases == None or self.coordinates.variant_bases == '-' or self.coordinates.variant_bases == '': alt = r.text @@ -731,7 +882,7 @@ def csq(self, include_status=None): str(mp.id), '&'.join(map(lambda a: a.translate(special_character_table), mp.aliases)), "https://civicdb.org/links/molecular-profiles/{}".format(mp.id), - '&'.join(map(lambda e: e.translate(special_character_table), self.hgvs_expressions)), + '&'.join(map(lambda e: e.strip().translate(special_character_table), self.hgvs_expressions)), str(self.allele_registry_id), '&'.join(self.clinvar_entries), str(mp.molecular_profile_score), @@ -743,7 +894,7 @@ def csq(self, include_status=None): evidence.status, str(evidence.significance or ''), str(evidence.evidence_direction or ''), - str(evidence.disease), + evidence.disease.name if evidence.disease is not None else "", '&'.join([str(therapy) for therapy in evidence.therapies]), str(evidence.therapy_interaction_type or ""), '&'.join(["{} (HPO ID {})".format(phenotype.name, phenotype.hpo_id) for phenotype in evidence.phenotypes]), @@ -775,7 +926,7 @@ def csq(self, include_status=None): str(mp.id), '&'.join(map(lambda a: a.translate(special_character_table), mp.aliases)), "https://civicdb.org/links/molecular-profiles/{}".format(mp.id), - '&'.join(map(lambda e: e.translate(special_character_table), self.hgvs_expressions)), + '&'.join(map(lambda e: e.strip().translate(special_character_table), self.hgvs_expressions)), str(self.allele_registry_id), '&'.join(self.clinvar_entries), str(mp.molecular_profile_score), @@ -801,10 +952,53 @@ def csq(self, include_status=None): ])) return csq +class FactorVariant(Variant): + _SIMPLE_FIELDS = Variant._SIMPLE_FIELDS.union({ + 'ncit_id', + }) + + @property + def factor(self): + """ + The :class:`Factor` record this variant belongs to. + """ + return _get_element_by_id('factor', self.feature_id) + + @property + def feature(self): + """ + The :class:`Factor` feature this variant belongs to. + """ + return self.factor + + +class FusionVariant(Variant): + _SIMPLE_FIELDS = Variant._SIMPLE_FIELDS.union({ + 'vicc_compliant_name', + }) + _COMPLEX_FIELDS = Variant._COMPLEX_FIELDS.union({ + 'five_prime_coordinates', + 'three_prime_coordinates', + }) + + @property + def fusion(self): + """ + The :class:`Fusion` record this variant belongs to. + """ + return _get_element_by_id('fusion', self.feature_id) + + @property + def feature(self): + """ + The :class:`Fusion` feature this variant belongs to. + """ + return self.fusion + class VariantGroup(CivicRecord): _SIMPLE_FIELDS = CivicRecord._SIMPLE_FIELDS.union( - {'description', 'name', 'variant_ids'}) + {'description', 'name', 'variant_ids', 'source_ids'}) _COMPLEX_FIELDS = CivicRecord._COMPLEX_FIELDS.union({ # 'errors', # TODO: Add support for these fields in advanced search endpoint # 'lifecycle_actions', @@ -815,6 +1009,7 @@ class VariantGroup(CivicRecord): def __init__(self, **kwargs): self._variants = [] + self._sources = [] super().__init__(**kwargs) @property @@ -827,10 +1022,18 @@ def variants(self): def variants(self, value): self._variants = value + @property + def sources(self): + return self._sources + + @sources.setter + def sources(self, value): + self._sources = value + class Gene(CivicRecord): _SIMPLE_FIELDS = CivicRecord._SIMPLE_FIELDS.union( - {'description', 'entrez_id', 'name'}) + {'description', 'entrez_id', 'name', 'source_ids'}) _COMPLEX_FIELDS = CivicRecord._COMPLEX_FIELDS.union({ 'aliases', # 'errors', # TODO: Add support for these fields in advanced search endpoint @@ -838,16 +1041,18 @@ class Gene(CivicRecord): # 'provisional_values', 'sources', 'variants', - 'assertions', }) def __init__(self, **kwargs): self._variants = [] - self._assertions = [] + self._sources = [] super().__init__(**kwargs) @property def variants(self): + """ + A list of :class:`Variant` records associated with this gene. + """ for variant in self._variants: variant._include_status = self._include_status return [v for v in self._variants if v.molecular_profiles] @@ -857,120 +1062,328 @@ def variants(self, value): self._variants = value @property - def assertions(self): - return [a for a in self._assertions if a.status in self._include_status] + def sources(self): + """ + A list of :class:`Source` records associated with the gene description. + """ + return self._sources - @assertions.setter - def assertions(self, value): - self._assertions = value + @sources.setter + def sources(self, value): + self._sources = value + + +class Factor(CivicRecord): + _SIMPLE_FIELDS = CivicRecord._SIMPLE_FIELDS.union( + {'description', 'ncit_id', 'name', 'full_name', 'source_ids'}) + _COMPLEX_FIELDS = CivicRecord._COMPLEX_FIELDS.union({ + 'aliases', + # 'errors', # TODO: Add support for these fields in advanced search endpoint + # /'lifecycle_actions', + # 'provisional_values', + 'sources', + 'variants', + }) + + def __init__(self, **kwargs): + self._variants = [] + self._sources = [] + super().__init__(**kwargs) + + @property + def variants(self): + """ + A list of :class:`Variant` records associated with this factor. + """ + for variant in self._variants: + variant._include_status = self._include_status + return [v for v in self._variants if v.molecular_profiles] + + @variants.setter + def variants(self, value): + self._variants = value + + @property + def sources(self): + """ + A list of :class:`Source` records associated with the factor description. + """ + return self._sources + + @sources.setter + def sources(self, value): + self._sources = value + + +class Fusion(CivicRecord): + _SIMPLE_FIELDS = CivicRecord._SIMPLE_FIELDS.union( + {'description', 'name', 'five_prime_partner_status', 'three_prime_partner_status', 'five_prime_gene_id', 'three_prime_gene_id', 'source_ids'}) + _COMPLEX_FIELDS = CivicRecord._COMPLEX_FIELDS.union({ + 'aliases', + # 'errors', # TODO: Add support for these fields in advanced search endpoint + # /'lifecycle_actions', + # 'provisional_values', + 'sources', + 'variants', + }) + + def __init__(self, **kwargs): + self._variants = [] + self._sources = [] + super().__init__(**kwargs) + + @property + def variants(self): + """ + A list of :class:`Variant` records associated with this fusion. + """ + for variant in self._variants: + variant._include_status = self._include_status + return [v for v in self._variants if v.molecular_profiles] + + @variants.setter + def variants(self, value): + self._variants = value + + @property + def sources(self): + """ + A list of :class:`Source` records associated with the fusion description. + """ + return self._sources + + @sources.setter + def sources(self, value): + self._sources = value + + @property + def five_prime_gene(self): + """ + The :class:`Gene` record of the 5' fusion partner if that partner is ``KNOWN``. + """ + if self.five_prime_gene_id: + return get_gene_by_id(self.five_prime_gene_id) + else: + return None + + @property + def three_prime_gene(self): + """ + The :class:`Gene` record of the 3' fusion partner if that partner is ``KNOWN``. + """ + if self.three_prime_gene_id: + return get_gene_by_id(self.three_prime_gene_id) + else: + return None class Evidence(CivicRecord): _SIMPLE_FIELDS = CivicRecord._SIMPLE_FIELDS.union({ - 'significance', + 'assertion_ids', 'description', - 'therapy_interaction_type', + 'disease_id', 'evidence_direction', 'evidence_level', 'evidence_type', + 'molecular_profile_id', 'name', - # 'open_change_count', + 'phenotype_ids', 'rating', + 'significance', + 'source_id', 'status', - 'molecular_profile_id', + 'therapy_ids', + 'therapy_interaction_type', 'variant_origin', - 'assertion_ids', }) _COMPLEX_FIELDS = CivicRecord._COMPLEX_FIELDS.union({ 'assertions', - 'disease', - 'therapies', - # 'errors', - # 'fields_with_pending_changes', - #'lifecycle_actions', 'phenotypes', - 'source'}) + 'therapies', + }) def __init__(self, **kwargs): self._assertions = [] + self._therapies = [] + self._phenotypes = [] super().__init__(**kwargs) @property def molecular_profile(self): + """ + The :class:`MolecularProfile` object this evidence item belongs to. + """ return get_molecular_profile_by_id(self.molecular_profile_id) + @property + def source(self): + """ + A :class:`Source` object from which this evidence was derived. + """ + return get_source_by_id(self.source_id) + @property def assertions(self): + """ + CIViC :class:`Assertion` records containing this evidence. + """ return [a for a in self._assertions if a.status in self._include_status] @assertions.setter def assertions(self, value): self._assertions = value + @property + def disease(self): + """ + The :class:`Disease` record of the cancer or cancer subtype context for the evidence record. **None** for functional evidence_type. + """ + if self.disease_id is not None: + return get_disease_by_id(self.disease_id) + else: + return None + + + @property + def therapies(self): + """ + Zero or more :class:`Therapy` records, linked to corresponding NCIt terms when applicable. Only used with therapeutic response predictive evidence_type. + """ + return self._therapies + + @therapies.setter + def therapies(self, value): + self._therapies = value + + @property + def phenotypes(self): + """ + Zero or more :class:`Phenotype` records, linked to corresponding `Human Phenotype Ontology (HPO)`_ terms when applicable. + + .. _Human Phenotype Ontology (HPO): https://hpo.jax.org/ + """ + return self._phenotypes + + @phenotypes.setter + def phenotypes(self, value): + self._phenotypes = value + @property def statement(self): + """ + A shorthand for the evidence ``description``. + """ return self.description - @statement.setter - def statement(self, value): - self.description = value + #@statement.setter + #def statement(self, value): + # self.description = value class Assertion(CivicRecord): _SIMPLE_FIELDS = CivicRecord._SIMPLE_FIELDS.union({ 'amp_level', - 'significance', - 'description', - 'therapy_interaction_type', 'assertion_direction', - # 'evidence_item_count', 'assertion_type', + 'description', + 'disease_id', + 'evidence_ids', 'fda_companion_test', 'fda_regulatory_approval', + 'molecular_profile_id', 'name', 'nccn_guideline', 'nccn_guideline_version', - # 'open_change_count', - # 'pending_evidence_count', + 'phenotype_ids', + 'significance', 'status', 'summary', + 'therapy_ids', + 'therapy_interaction_type', 'variant_origin', - 'molecular_profile_id', - 'evidence_ids', }) _COMPLEX_FIELDS = CivicRecord._COMPLEX_FIELDS.union({ 'acmg_codes', 'clingen_codes', - 'disease', 'therapies', 'evidence_items', - #'lifecycle_actions', 'phenotypes', }) def __init__(self, **kwargs): self._evidence_items = [] + self._therapies = [] + self._phenotypes = [] super().__init__(**kwargs) @property def evidence(self): + """ + A shortcut for the :attr:`evidence_items` property. + """ return self.evidence_items @property def evidence_items(self): + """ + A list of :class:`Evidence` records supporting this assertion. + """ return [e for e in self._evidence_items if e.status in self._include_status] @evidence_items.setter def evidence_items(self, value): self._evidence_items = value + @property + def disease(self): + """ + The :class:`Disease` record of the cancer or cancer subtype context for the assertion, linked to a corresponding `Disease Ontology`_ term when applicable. + + .. _Disease Ontology: http://disease-ontology.org/ + """ + return get_disease_by_id(self.disease_id) + + @property + def therapies(self): + """ + Zero or more :class:`Therapy` records, linked to corresponding `NCIt`_ terms when applicable. Only used with therapeutic response predictive evidence_type. + + .. _NCIt: https://ncit.nci.nih.gov/ncitbrowser/ + """ + return self._therapies + + @therapies.setter + def therapies(self, value): + self._therapies = value + + @property + def phenotypes(self): + """ + Zero or more :class:`Phenotype` records associated with the assertion, linked to corresponding `Human Phenotype Ontology (HPO)`_ terms. + + .. _Human Phenotype Ontology (HPO): https://hpo.jax.org/ + """ + return self._phenotypes + + @phenotypes.setter + def phenotypes(self, value): + self._phenotypes = value + @property def molecular_profile(self): + """ + The :class:`MolecularProfile` object this assertion belongs to. + """ return get_molecular_profile_by_id(self.molecular_profile_id) @property def hpo_ids(self): + """ + A list of `HPO`_ IDs of the :attr:`phenotypes` associated with this assertion + + .. _HPO: https://hpo.jax.org/ + """ return [x.hpo_id for x in self.phenotypes if x.hpo_id] def format_nccn_guideline(self): @@ -981,7 +1394,6 @@ def format_nccn_guideline(self): class User(CivicRecord): - _SIMPLE_FIELDS = CivicRecord._SIMPLE_FIELDS.union({ 'name', 'username', @@ -1037,57 +1449,71 @@ class Organization(CivicRecord): }) -class CivicAttribute(CivicRecord, dict): - - _SIMPLE_FIELDS = {'type'} - _COMPLEX_FIELDS = set() - - def __repr__(self): - try: - _id = self.id - except AttributeError: - return ''.format(self.type) - else: - return ''.format(self.type, self.id) +class Therapy(CivicRecord): + _SIMPLE_FIELDS = CivicRecord._SIMPLE_FIELDS.union({ + 'ncit_id', + 'therapy_url', + 'name' + }) + _COMPLEX_FIELDS = CivicRecord._COMPLEX_FIELDS.union({ + 'aliases' + }) def __init__(self, **kwargs): - kwargs['partial'] = False - for k, v in kwargs.items(): - self.__setattr__(k, v) + self._evidence_items = [] + self._assertions = [] super().__init__(**kwargs) - def __hash__(self): - try: - _id = self.id - except AttributeError: - raise NotImplementedError - if _id is not None: - return CivicRecord.__hash__(self) + def __str__(self): + if self.ncit_id is None: + return self.name else: - raise ValueError + return "{} (NCIt ID {})".format(self.name, self.ncit_id) @property - def site_link(self): - return None + def evidence(self): + """ + A shortcut for the :attr:`evidence_items` property. + """ + return self.evidence_items - def update(self): - return NotImplementedError + @property + def evidence_items(self): + """ + A list of :class:`Evidence` records linked to this therapy. + """ + return [e for e in self._evidence_items if e.status in self._include_status] + @evidence_items.setter + def evidence_items(self, value): + self._evidence_items = value -class Therapy(CivicAttribute): - _SIMPLE_FIELDS = CivicAttribute._SIMPLE_FIELDS.union({'ncit_id', 'therapy_url', 'name'}) - _COMPLEX_FIELDS = CivicAttribute._COMPLEX_FIELDS.union({'aliases'}) + @property + def assertions(self): + """ + A list of :class:`Assertion` records linked to this therapy. + """ + return [a for a in self._assertions if a.status in self._include_status] + + @assertions.setter + def assertions(self, value): + self._assertions = value - def __str__(self): - if self.ncit_id is None: - return self.name - else: - return "{} (NCIt ID {})".format(self.name, self.ncit_id) +class Disease(CivicRecord): + _SIMPLE_FIELDS = CivicRecord._SIMPLE_FIELDS.union({ + 'name', + 'doid', + 'disease_url' + }) + _COMPLEX_FIELDS = CivicRecord._COMPLEX_FIELDS.union({ + 'aliases' + }) -class Disease(CivicAttribute): - _SIMPLE_FIELDS = CivicAttribute._SIMPLE_FIELDS.union({'display_name', 'doid', 'disease_url'}) - _COMPLEX_FIELDS = CivicAttribute._COMPLEX_FIELDS.union({'aliases'}) + def __init__(self, **kwargs): + self._evidence_items = [] + self._assertions = [] + super().__init__(**kwargs) def __str__(self): if self.doid is None: @@ -1095,16 +1521,83 @@ def __str__(self): else: return "{} (DOID {})".format(self.name, self.doid) + @property + def evidence(self): + """ + A shortcut for the :attr:`evidence_items` property. + """ + return self.evidence_items + + @property + def evidence_items(self): + """ + A list of :class:`Evidence` records linked to this disease. + """ + return [e for e in self._evidence_items if e.status in self._include_status] + + @evidence_items.setter + def evidence_items(self, value): + self._evidence_items = value + + @property + def assertions(self): + """ + A list of :class:`Assertion` records linked to this disease. + """ + return [a for a in self._assertions if a.status in self._include_status] + + @assertions.setter + def assertions(self, value): + self._assertions = value + + +class Phenotype(CivicRecord): + _SIMPLE_FIELDS = CivicRecord._SIMPLE_FIELDS.union({ + 'hpo_id', + 'phenotype_url', + 'name' + }) -class Phenotype(CivicAttribute): - _SIMPLE_FIELDS = CivicAttribute._SIMPLE_FIELDS.union({'hpo_id', 'url', 'name'}) + def __init__(self, **kwargs): + self._evidence_items = [] + self._assertions = [] + super().__init__(**kwargs) def __str__(self): return "{} (HPO ID {})".format(self.name, self.hpo_id) + @property + def evidence(self): + """ + A shortcut for the :attr:`evidence_items` property. + """ + return self.evidence_items + + @property + def evidence_items(self): + """ + A list of :class:`Evidence` records linked to this phenotype. + """ + return [e for e in self._evidence_items if e.status in self._include_status] -class Source(CivicAttribute): - _SIMPLE_FIELDS = CivicAttribute._SIMPLE_FIELDS.union({ + @evidence_items.setter + def evidence_items(self, value): + self._evidence_items = value + + @property + def assertions(self): + """ + A list of :class:`Assertion` records linked to this phenotype. + """ + return [a for a in self._assertions if a.status in self._include_status] + + @assertions.setter + def assertions(self, value): + self._assertions = value + + +class Source(CivicRecord): + _SIMPLE_FIELDS = CivicRecord._SIMPLE_FIELDS.union({ 'citation', 'citation_id', 'source_type', @@ -1118,66 +1611,195 @@ class Source(CivicAttribute): 'source_url', 'title' }) - _COMPLEX_FIELDS = CivicAttribute._COMPLEX_FIELDS.union({'clinical_trials'}) + _COMPLEX_FIELDS = CivicRecord._COMPLEX_FIELDS.union({ + 'clinical_trials' + }) + + def __init__(self, **kwargs): + self._evidence_items = [] + self._genes = [] + self._factors = [] + self._fusions = [] + self._molecular_profiles = [] + super().__init__(**kwargs) def __str__(self): return "{} ({} {})".format(self.citation, self.source_type, self.citation_id) + @property + def evidence(self): + """ + A shortcut for the :attr:`evidence_items` property. + """ + return self.evidence_items -class Country(CivicAttribute): - _SIMPLE_FIELDS = CivicRecord._SIMPLE_FIELDS.union({'iso', 'name'}) - - -class LifecycleAction(CivicAttribute): - _OPTIONAL_FIELDS = CivicAttribute._OPTIONAL_FIELDS.union({ - 'submitted', - 'last_modified', - 'last_reviewed', - 'accepted' - }) - _COMPLEX_FIELDS = CivicAttribute._COMPLEX_FIELDS.union(_OPTIONAL_FIELDS) + @property + def evidence_items(self): + """ + A list of :class:`Evidence` records linked to this source. + """ + return [e for e in self._evidence_items if e.status in self._include_status] + @evidence_items.setter + def evidence_items(self, value): + self._evidence_items = value -class BaseLifecycleAction(CivicAttribute): - _SIMPLE_FIELDS = CivicAttribute._SIMPLE_FIELDS.union({ - 'timestamp' - }) - _COMPLEX_FIELDS = CivicAttribute._COMPLEX_FIELDS.union({ - 'user' - }) + @property + def genes(self): + """ + A list of :class:`Gene` records supported by this source. + """ + return self._genes - def __init__(self, **kwargs): - self._timestamp = None - super().__init__(**kwargs) + @genes.setter + def genes(self, value): + self._genes = value @property - def timestamp(self): - assert self._timestamp[-1] == 'Z' - return datetime.fromisoformat(self._timestamp[:-1]) + def fusions(self): + """ + A list of :class:`Fusion` records supported by this source. + """ + return self._fusions - @timestamp.setter - def timestamp(self, value): - self._timestamp = value + @fusions.setter + def fusions(self, value): + self._fusions = value + @property + def factors(self): + """ + A list of :class:`Factor` records supported by this source. + """ + return self._factors -class Submitted(BaseLifecycleAction): - pass + @factors.setter + def factors(self, value): + self._factors = value + @property + def molecular_profiles(self): + """ + A list of :class:`MolecularProfile` records supported by this source. + """ + return self._molecular_profiles -class LastModified(BaseLifecycleAction): - pass + @molecular_profiles.setter + def molecular_profiles(self, value): + self._molecular_profiles = value -class LastReviewed(BaseLifecycleAction): - pass +class CivicAttribute(CivicRecord, dict): + _SIMPLE_FIELDS = {'type'} + _COMPLEX_FIELDS = set() -class Accepted(BaseLifecycleAction): - pass + def __repr__(self): + try: + _id = self.id + except AttributeError: + return ''.format(self.type) + else: + return ''.format(self.type, self.id) + def __init__(self, **kwargs): + kwargs['partial'] = False + for k, v in kwargs.items(): + self.__setattr__(k, v) + super().__init__(**kwargs) -def get_cached(element_type, element_id): - klass = get_class(element_type) + def __hash__(self): + try: + _id = self.id + except AttributeError: + raise NotImplementedError + if _id is not None: + return CivicRecord.__hash__(self) + else: + raise ValueError + + @property + def site_link(self): + return None + + def update(self): + return NotImplementedError + + +class Coordinates(CivicAttribute): + _SIMPLE_FIELDS = CivicAttribute._SIMPLE_FIELDS.union({ + 'chromosome', + 'start', + 'stop', + 'reference_bases', + 'variant_bases', + 'ensembl_version', + 'representative_transcript', + 'reference_build', + }) + + def __init__(self, **kwargs): + if self.reference_bases in ['', '-']: + self.reference_bases = None + if self.variant_bases in ['', '-']: + self.variant_bases = None + super().__init__(**kwargs) + + +class Country(CivicAttribute): + _SIMPLE_FIELDS = CivicRecord._SIMPLE_FIELDS.union({'iso', 'name'}) + + +class LifecycleAction(CivicAttribute): + _OPTIONAL_FIELDS = CivicAttribute._OPTIONAL_FIELDS.union({ + 'submitted', + 'last_modified', + 'last_reviewed', + 'accepted' + }) + _COMPLEX_FIELDS = CivicAttribute._COMPLEX_FIELDS.union(_OPTIONAL_FIELDS) + + +class BaseLifecycleAction(CivicAttribute): + _SIMPLE_FIELDS = CivicAttribute._SIMPLE_FIELDS.union({ + 'timestamp' + }) + _COMPLEX_FIELDS = CivicAttribute._COMPLEX_FIELDS.union({ + 'user' + }) + + def __init__(self, **kwargs): + self._timestamp = None + super().__init__(**kwargs) + + @property + def timestamp(self): + assert self._timestamp[-1] == 'Z' + return datetime.fromisoformat(self._timestamp[:-1]) + + @timestamp.setter + def timestamp(self, value): + self._timestamp = value + + +class Submitted(BaseLifecycleAction): + pass + + +class LastModified(BaseLifecycleAction): + pass + + +class LastReviewed(BaseLifecycleAction): + pass + + +class Accepted(BaseLifecycleAction): + pass + + +def get_cached(element_type, element_id): + klass = get_class(element_type) r = klass(type=element_type, id=element_id, partial=True) return CACHE.get(hash(r), False) @@ -1196,27 +1818,33 @@ def _get_elements_by_ids(element, id_list=[], allow_cached=True, get_all=False): if not get_all: cached = [get_cached(element, element_id) for element_id in id_list] if all(cached): - logging.info('Loading {} from cache'.format(pluralize(element))) + logging.info('Loading {} from cache'.format(utils.pluralize(element))) return cached else: - cached = [get_cached(element, element_id) for element_id in CACHE['{}_all_ids'.format(pluralize(element))]] - logging.info('Loading {} from cache'.format(pluralize(element))) + cached = [get_cached(element, element_id) for element_id in CACHE['{}_all_ids'.format(utils.pluralize(element))]] + logging.info('Loading {} from cache'.format(utils.pluralize(element))) return cached if id_list and get_all: raise ValueError('Please pass list of ids or use the get_all flag, not both.') if get_all: - logging.warning('Getting all {}. This may take a couple of minutes...'.format(pluralize(element))) + logging.warning('Getting all {}. This may take a couple of minutes...'.format(utils.pluralize(element))) response_elements = _request_all(element) else: response_elements = _request_by_ids(element, id_list) + elements = [] + ids = [] for e in response_elements: e = _postprocess_response_element(e, element) + if element == 'variant': + cls = get_class(e['subtype']) + else: + cls = get_class(e['type']) + partial_element = cls(**e, partial=True) + ids.append(e['id']) + elements.append(partial_element) - cls = get_class(element) - elements = [cls(**x, partial=True) for x in response_elements] - cache = [x['id'] for x in response_elements] - CACHE['{}_all_ids'.format(pluralize(element))] = cache + CACHE['{}_all_ids'.format(utils.pluralize(element))] = ids return elements @@ -1227,39 +1855,65 @@ def _postprocess_response_element(e, element): if element == 'assertion': e['molecular_profile_id'] = e['molecular_profile']['id'] e['evidence_ids'] = [evidence['id'] for evidence in e['evidenceItems']] + e['disease_id'] = e['disease']['id'] if e['disease'] is not None else None + e['therapy_ids'] = [t['id'] for t in e['therapies']] + e['phenotype_ids'] = [p['id'] for p in e['phenotypes']] e['status'] = e['status'].lower() + del e['therapies'] elif element == 'evidence': + e['source_id'] = e['source']['id'] e['molecular_profile_id'] = e['molecular_profile']['id'] e['assertion_ids'] = [a['id'] for a in e['assertions']] + e['disease_id'] = e['disease']['id'] if e['disease'] is not None else None + e['therapy_ids'] = [t['id'] for t in e['therapies']] + e['phenotype_ids'] = [p['id'] for p in e['phenotypes']] e['status'] = e['status'].lower() + del e['therapies'] + elif element == 'gene': + e['source_ids'] = [v['id'] for v in e['sources']] + del e['sources'] + elif element == 'factor': + e['source_ids'] = [v['id'] for v in e['sources']] + del e['sources'] + elif element == 'fusion': + e['source_ids'] = [v['id'] for v in e['sources']] + del e['sources'] + if e['threePrimeGene']: + e['three_prime_gene_id'] = e['threePrimeGene']['id'] + else: + e['three_prime_gene_id'] = None + if e['fivePrimeGene']: + e['five_prime_gene_id'] = e['fivePrimeGene']['id'] + else: + e['five_prime_gene_id'] = None + elif element == 'molecular_profile': + e['source_ids'] = [s['id'] for s in e['sources']] + del e['sources'] + e['variant_ids'] = [v['id'] for v in e['variants']] + del e['variants'] elif element == 'variant': - e['gene_id'] = e['gene']['id'] - e['entrez_name'] = e['gene']['name'] - #TODO: handle other types of Variants - if e['__typename'] != 'GeneVariant': + e['feature_id'] = e['feature']['id'] + if e['__typename'] == 'GeneVariant': + e['subtype'] = 'gene_variant' + e['entrez_id'] = e['feature']['featureInstance']['entrezId'] + e['entrez_name'] = e['feature']['name'] + build = e['coordinates']['reference_build'] + if build == 'GRCH37': + build = 'GRCh37' + elif build == 'GRCH38': + build = 'GRCh38' + e['coordinates']['reference_build'] = build + elif e['__typename'] == 'FactorVariant': + e['subtype'] = 'factor_variant' + elif e['__typename'] == 'FusionVariant': + e['subtype'] = 'fusion_variant' + else: raise Exception("Variant type {} not supported yet".format(e['__typename'])) - e['entrez_id'] = e['gene']['featureInstance']['entrezId'] - build = e['coordinates']['referenceBuild'] - if build == 'GRCH37': - build = 'GRCh37' - elif build == 'GRCH38': - build = 'GRCh38' - e['coordinates'] = { - 'ensembl_version': e['coordinates']['ensemblVersion'], - 'reference_build': build, - 'reference_bases': e['coordinates']['referenceBases'], - 'variant_bases': e['coordinates']['variantBases'], - 'representative_transcript': e['coordinates']['representativeTranscript'], - 'chromosome': e['coordinates']['chromosome'], - 'start': e['coordinates']['start'], - 'stop': e['coordinates']['stop'], - } elif element == 'variant_group': + e['source_ids'] = [v['id'] for v in e['sources']] + del e['sources'] e['variant_ids'] = [v['id'] for v in e['variants']['nodes']] del e['variants'] - elif element == 'molecular_profile': - e['variant_ids'] = [v['id'] for v in e['variants']] - del e['variants'] return e @@ -1269,12 +1923,18 @@ def _get_element_by_id(element, id, allow_cached=True): def _request_by_ids(element, ids): payload_methods = { - 'evidence': _construct_get_evidence_payload, - 'gene': _construct_get_gene_payload, - 'variant': _construct_get_variant_payload, - 'assertion': _construct_get_assertion_payload, - 'variant_group': _construct_get_variant_group_payload, - 'molecular_profile': _construct_get_molecular_profile_payload, + 'evidence': graphql_payloads._construct_get_evidence_payload, + 'gene': graphql_payloads._construct_get_gene_payload, + 'factor': graphql_payloads._construct_get_factor_payload, + 'fusion': graphql_payloads._construct_get_fusion_payload, + 'variant': graphql_payloads._construct_get_variant_payload, + 'assertion': graphql_payloads._construct_get_assertion_payload, + 'variant_group': graphql_payloads._construct_get_variant_group_payload, + 'molecular_profile': graphql_payloads._construct_get_molecular_profile_payload, + 'source': graphql_payloads._construct_get_source_payload, + 'disease': graphql_payloads._construct_get_disease_payload, + 'therapy': graphql_payloads._construct_get_therapy_payload, + 'phenotype': graphql_payloads._construct_get_phenotype_payload, } payload_method = payload_methods[element] payload = payload_method() @@ -1290,12 +1950,18 @@ def _request_by_ids(element, ids): def _request_all(element): payload_methods = { - 'evidence': _construct_get_all_evidence_payload, - 'gene': _construct_get_all_genes_payload, - 'variant': _construct_get_all_variants_payload, - 'assertion': _construct_get_all_assertions_payload, - 'variant_group': _construct_get_all_variant_groups_payload, - 'molecular_profile': _construct_get_all_molecular_profiles_payload, + 'evidence': graphql_payloads._construct_get_all_evidence_payload, + 'gene': graphql_payloads._construct_get_all_genes_payload, + 'factor': graphql_payloads._construct_get_all_factors_payload, + 'fusion': graphql_payloads._construct_get_all_fusions_payload, + 'variant': graphql_payloads._construct_get_all_variants_payload, + 'assertion': graphql_payloads._construct_get_all_assertions_payload, + 'variant_group': graphql_payloads._construct_get_all_variant_groups_payload, + 'molecular_profile': graphql_payloads._construct_get_all_molecular_profiles_payload, + 'source': graphql_payloads._construct_get_all_sources_payload, + 'disease': graphql_payloads._construct_get_all_diseases_payload, + 'therapy': graphql_payloads._construct_get_all_therapies_payload, + 'phenotype': graphql_payloads._construct_get_all_phenotypes_payload, } payload_method = payload_methods[element] payload = payload_method() @@ -1304,7 +1970,7 @@ def _request_all(element): variables = { "after": after_cursor } resp = requests.post(API_URL, json={'query': payload, 'variables': variables}, timeout=(10,200)) resp.raise_for_status() - response = resp.json()['data'][pluralize(element)] + response = resp.json()['data'][utils.pluralize(element)] response_elements = response['nodes'] has_next_page = response['pageInfo']['hasNextPage'] after_cursor = response['pageInfo']['endCursor'] @@ -1315,670 +1981,24 @@ def _request_all(element): } resp = requests.post(API_URL, json={'query': payload, 'variables': variables}, timeout=(10,200)) resp.raise_for_status() - response = resp.json()['data'][pluralize(element)] + response = resp.json()['data'][utils.pluralize(element)] response_elements.extend(response['nodes']) has_next_page = response['pageInfo']['hasNextPage'] after_cursor = response['pageInfo']['endCursor'] return response_elements +######################### +# Get Entities By ID(s) # +######################### - -def _construct_get_gene_payload(): - return """ - query gene($id: Int!) { - gene(id: $id) { - id - name - description - entrez_id: entrezId - aliases: featureAliases - sources { - id - name - title - citation - citation_id: citationId - source_type: sourceType - abstract - asco_abstract_id: ascoAbstractId - author_string: authorString - full_journal_title: fullJournalTitle - journal - pmc_id: pmcId - publication_date: publicationDate - source_url: sourceUrl - clinical_trials: clinicalTrials { - id - name - description - nctId - url - } - } - } - }""" - - -def _construct_get_all_genes_payload(): - return """ - query genes($after: String) { - genes(after: $after) { - totalCount - pageInfo { - hasNextPage - endCursor - } - nodes { - id - name - description - entrez_id: entrezId - aliases: featureAliases - sources { - id - name - title - citation - citation_id: citationId - source_type: sourceType - abstract - asco_abstract_id: ascoAbstractId - author_string: authorString - full_journal_title: fullJournalTitle - journal - pmc_id: pmcId - publication_date: publicationDate - source_url: sourceUrl - clinical_trials: clinicalTrials { - id - name - description - nctId - url - } - } - } - } - }""" - - -def _construct_get_molecular_profile_payload(): - return """ - query molecularProfile($id: Int!) { - molecular_profile: molecularProfile(id: $id) { - id - description - molecular_profile_score: molecularProfileScore - name - variants { - id - } - aliases: molecularProfileAliases - parsed_name: parsedName { - type: __typename - ... on MolecularProfileTextSegment { - text - } - ... on Feature { - id - name - } - ... on Variant { - id - name - deprecated - } - } - sources { - id - name - title - citation - citation_id: citationId - source_type: sourceType - abstract - asco_abstract_id: ascoAbstractId - author_string: authorString - full_journal_title: fullJournalTitle - journal - pmc_id: pmcId - publication_date: publicationDate - source_url: sourceUrl - clinical_trials: clinicalTrials { - id - name - description - nctId - url - } - } - } - }""" - - - -def _construct_get_all_molecular_profiles_payload(): - return """ - query molecularProfiles($after: String) { - molecular_profiles: molecularProfiles(after: $after, evidenceStatusFilter: ALL) { - totalCount - pageInfo { - hasNextPage - endCursor - } - nodes { - id - description - molecular_profile_score: molecularProfileScore - name - variants { - id - } - aliases: molecularProfileAliases - parsed_name: parsedName { - type: __typename - ... on MolecularProfileTextSegment { - text - } - ... on Feature { - id - name - } - ... on Variant { - id - name - deprecated - } - } - sources { - id - name - title - citation - citation_id: citationId - source_type: sourceType - abstract - asco_abstract_id: ascoAbstractId - author_string: authorString - full_journal_title: fullJournalTitle - journal - pmc_id: pmcId - publication_date: publicationDate - source_url: sourceUrl - clinical_trials: clinicalTrials { - id - name - description - nctId - url - } - } - } - } - }""" - -def _construct_get_variant_payload(): - return """ - query variant($id: Int!) { - variant(id: $id) { - __typename - id - name - ... on GeneVariant { - allele_registry_id: alleleRegistryId - clinvar_entries: clinvarIds - hgvs_expressions: hgvsDescriptions - coordinates { - referenceBuild - ensemblVersion - chromosome - representativeTranscript - start - stop - referenceBases - variantBases - coordinateType - } - } - gene: feature { - id - name - featureInstance { - ... on Gene { - entrezId - } - } - } - single_variant_molecular_profile_id: singleVariantMolecularProfileId - variant_aliases: variantAliases - variant_types: variantTypes { - id - name - so_id: soid - description - url - } - } - }""" - - -def _construct_get_all_variants_payload(): - return """ - query variants($after: String) { - variants(after: $after, category: GENE) { - totalCount - pageInfo { - hasNextPage - endCursor - } - nodes { - __typename - id - name - ... on GeneVariant { - allele_registry_id: alleleRegistryId - clinvar_entries: clinvarIds - hgvs_expressions: hgvsDescriptions - coordinates { - referenceBuild - ensemblVersion - chromosome - representativeTranscript - start - stop - referenceBases - variantBases - coordinateType - } - } - gene: feature { - id - name - featureInstance { - ... on Gene { - entrezId - } - } - } - single_variant_molecular_profile_id: singleVariantMolecularProfileId - variant_aliases: variantAliases - variant_types: variantTypes { - id - name - so_id: soid - description - url - } - } - } - }""" - -def _construct_get_evidence_payload(): - return """ - query evidenceItem($id: Int!) { - evidence: evidenceItem(id: $id) { - id - name - significance - description - therapy_interaction_type: therapyInteractionType - evidence_direction: evidenceDirection - evidence_level: evidenceLevel - evidence_type: evidenceType - status - variant_origin: variantOrigin - molecular_profile: molecularProfile { - id - } - disease { - id - name - display_name: displayName - doid - disease_url: diseaseUrl - aliases: diseaseAliases - } - therapies { - id - name - ncit_id: ncitId - therapy_url: therapyUrl - aliases: therapyAliases - } - phenotypes { - id - name - hpo_id: hpoId - url - } - assertions { - id - } - source { - id - name - title - citation - citation_id: citationId - source_type: sourceType - abstract - asco_abstract_id: ascoAbstractId - author_string: authorString - full_journal_title: fullJournalTitle - journal - pmc_id: pmcId - publication_date: publicationDate - source_url: sourceUrl - clinical_trials: clinicalTrials { - id - name - description - nctId - url - } - } - rating: evidenceRating - } - }""" - - - -def _construct_get_all_evidence_payload(): - return """ - query evidenceItems($after: String) { - evidence_items: evidenceItems(after: $after, status: ALL) { - totalCount - pageInfo { - hasNextPage - endCursor - } - nodes { - id - name - significance - description - therapy_interaction_type: therapyInteractionType - evidence_direction: evidenceDirection - evidence_level: evidenceLevel - evidence_type: evidenceType - status - variant_origin: variantOrigin - molecular_profile: molecularProfile { - id - } - disease { - id - name - display_name: displayName - doid - disease_url: diseaseUrl - aliases: diseaseAliases - } - therapies { - id - name - ncit_id: ncitId - therapy_url: therapyUrl - aliases: therapyAliases - } - phenotypes { - id - name - hpo_id: hpoId - url - } - assertions { - id - } - source { - id - name - title - citation - citation_id: citationId - source_type: sourceType - abstract - asco_abstract_id: ascoAbstractId - author_string: authorString - full_journal_title: fullJournalTitle - journal - pmc_id: pmcId - publication_date: publicationDate - source_url: sourceUrl - clinical_trials: clinicalTrials { - id - name - description - nctId - url - } - } - rating: evidenceRating - } - } - }""" - - - -def _construct_get_assertion_payload(): - return """ - query assertion($id: Int!) { - assertion(id: $id) { - id - name - amp_level: ampLevel - significance - description - therapy_interaction_type: therapyInteractionType - assertion_direction: assertionDirection - assertion_type: assertionType - fda_companion_test: fdaCompanionTest - fda_regulatory_approval: regulatoryApproval - name - nccn_guideline: nccnGuideline { - name - } - nccn_guideline_version: nccnGuidelineVersion - status - summary - variant_origin: variantOrigin - molecular_profile: molecularProfile { - id - } - acmg_codes: acmgCodes { - id - code - description - } - clingen_codes: clingenCodes { - id - code - description - } - disease { - id - name - display_name: displayName - doid - disease_url: diseaseUrl - aliases: diseaseAliases - } - therapies { - id - name - ncit_id: ncitId - therapy_url: therapyUrl - aliases: therapyAliases - } - evidenceItems { - id - } - phenotypes { - id - name - hpo_id: hpoId - url - } - } - }""" - - -def _construct_get_all_assertions_payload(): - return """ - query assertions($after: String) { - assertions(after: $after, status: ALL) { - totalCount - pageInfo { - hasNextPage - endCursor - } - nodes { - id - name - amp_level: ampLevel - significance - description - therapy_interaction_type: therapyInteractionType - assertion_direction: assertionDirection - assertion_type: assertionType - fda_companion_test: fdaCompanionTest - fda_regulatory_approval: regulatoryApproval - name - nccn_guideline: nccnGuideline { - name - } - nccn_guideline_version: nccnGuidelineVersion - status - summary - variant_origin: variantOrigin - molecular_profile: molecularProfile { - id - } - acmg_codes: acmgCodes { - id - code - description - } - clingen_codes: clingenCodes { - id - code - description - } - disease { - id - name - display_name: displayName - doid - disease_url: diseaseUrl - aliases: diseaseAliases - } - therapies { - id - name - ncit_id: ncitId - therapy_url: therapyUrl - aliases: therapyAliases - } - evidenceItems { - id - } - phenotypes { - id - name - hpo_id: hpoId - url - } - } - } - }""" - - -def _construct_get_variant_group_payload(): - return """ - query variantGroup($id: Int!) { - variant_group: variantGroup(id: $id) { - id - name - description - variants(first: 100) { - nodes { - id - } - } - sources { - id - name - title - citation - citation_id: citationId - source_type: sourceType - abstract - asco_abstract_id: ascoAbstractId - author_string: authorString - full_journal_title: fullJournalTitle - journal - pmc_id: pmcId - publication_date: publicationDate - source_url: sourceUrl - clinical_trials: clinicalTrials { - id - name - description - nctId - url - } - } - } - }""" - -def _construct_get_all_variant_groups_payload(): - return """ - query variantGroups($after: String) { - variant_groups: variantGroups(after: $after) { - totalCount - pageInfo { - hasNextPage - endCursor - } - nodes { - id - name - description - variants(first: 100) { - nodes { - id - } - } - sources { - id - name - title - citation - citation_id: citationId - source_type: sourceType - abstract - asco_abstract_id: ascoAbstractId - author_string: authorString - full_journal_title: fullJournalTitle - journal - pmc_id: pmcId - publication_date: publicationDate - source_url: sourceUrl - clinical_trials: clinicalTrials { - id - name - description - nctId - url - } - } - } - } - }""" - +# Evidence def get_evidence_by_ids(evidence_id_list): + """ + :param list evidence_id_list: A list of CIViC evidence item IDs to query against to cache and (as needed) CIViC. + :returns: A list of :class:`EvidenceItem` objects. + """ logging.info('Getting evidence...') evidence = _get_elements_by_ids('evidence', evidence_id_list) logging.info('Caching evidence details...') @@ -1992,14 +2012,20 @@ def get_evidence_by_ids(evidence_id_list): def get_evidence_by_id(evidence_id): + """ + :param int phenotype_id: A single CIViC evidence item ID. + :returns: A :class:`EvidenceItem` object. + """ return get_evidence_by_ids([evidence_id])[0] -def get_molecular_profile_by_id(mp_id): - return get_molecular_profiles_by_ids([mp_id])[0] - +# Molecular Profile def get_molecular_profiles_by_ids(mp_id_list): + """ + :param list mp_id_list: A list of CIViC molecular profile IDs to query against to cache and (as needed) CIViC. + :returns: A list of :class:`MolecularProfile` objects. + """ logging.info('Getting molecular profiles...') mps = _get_elements_by_ids('molecular_profile', mp_id_list) for mp in mps: @@ -2008,9 +2034,23 @@ def get_molecular_profiles_by_ids(mp_id_list): return mps -def get_assertions_by_ids(assertion_id_list=[], get_all=False): +def get_molecular_profile_by_id(mp_id): + """ + :param int mp_id: A single CIViC molecular profile ID. + :returns: A :class:`MolecularProfile` object. + """ + return get_molecular_profiles_by_ids([mp_id])[0] + + +# Assertion + +def get_assertions_by_ids(assertion_id_list): + """ + :param list assertion_id_list: A list of CIViC assertion IDs to query against to cache and (as needed) CIViC. + :returns: A list of :class:`Assertion` objects. + """ logging.info('Getting assertions...') - assertions = _get_elements_by_ids('assertion', assertion_id_list, get_all=get_all) + assertions = _get_elements_by_ids('assertion', assertion_id_list) for a in assertions: a._include_status = ['accepted', 'submitted', 'rejected'] logging.info('Caching variant details...') @@ -2022,151 +2062,668 @@ def get_assertions_by_ids(assertion_id_list=[], get_all=False): def get_assertion_by_id(assertion_id): + """ + :param int assertion_id: A single CIViC assertion ID. + :returns: A :class:`Assertion` object. + """ return get_assertions_by_ids([assertion_id])[0] -def get_all_assertions(include_status=['accepted','submitted','rejected'], allow_cached=True): - assertions = _get_elements_by_ids('assertion', allow_cached=allow_cached, get_all=True) - return [a for a in assertions if a.status in include_status] - - -def search_assertions_by_coordinates(coordinates, search_mode='any'): - variants = search_variants_by_coordinates(coordinates, search_mode=search_mode) - assertions = set() - for v in variants: - if v.single_variant_molecular_profile.assertions: - assertions.update(v.single_variant_molecular_profile.assertions) - return list(assertions) - +# Variant def get_variants_by_ids(variant_id_list): + """ + :param list variant_id_list: A list of CIViC variant IDs to query against to cache and (as needed) CIViC. + :returns: A list of :class:`Variant` objects. + """ logging.info('Getting variants...') variants = _get_elements_by_ids('variant', variant_id_list) gene_ids = set() + factor_ids = set() + fusion_ids = set() for variant in variants: - gene_ids.add(variant.gene_id) + if isinstance(variant, GeneVariant): + gene_ids.add(variant.feature_id) + elif isinstance(variant, FactorVariant): + factor_ids.add(variant.feature_id) + elif isinstance(variant, FusionVariant): + fusion_ids.add(variant.feature_id) variant._include_status = ['accepted', 'submitted', 'rejected'] if gene_ids: logging.info('Caching gene details...') _get_elements_by_ids('gene', gene_ids) + if factor_ids: + logging.info('Caching factor details...') + _get_elements_by_ids('factor', factor_ids) + if fusion_ids: + logging.info('Caching fusion details...') + _get_elements_by_ids('fusion', fusion_ids) return variants def get_variant_by_id(variant_id): + """ + :param int variant_id: A single CIViC variant ID. + :returns: A :class:`Variant` object. + """ return get_variants_by_ids([variant_id])[0] -def get_variant_groups_by_ids(variant_group_id_list): - logging.info('Getting variant groups...') - vgs = _get_elements_by_ids('variant_group', variant_group_id_list) - for vg in vgs: - vg._include_status = ['accepted', 'submitted', 'rejected'] - return vgs +# Variant Group + +def get_variant_groups_by_ids(variant_group_id_list): + """ + :param list variant_group_id_list: A list of CIViC variant group IDs to query against to cache and (as needed) CIViC. + :returns: A list of :class:`VariantGroup` objects. + """ + logging.info('Getting variant groups...') + vgs = _get_elements_by_ids('variant_group', variant_group_id_list) + for vg in vgs: + vg._include_status = ['accepted', 'submitted', 'rejected'] + return vgs + + +def get_variant_group_by_id(variant_group_id): + """ + :param int variant_group_id: A single CIViC variant group ID. + :returns: A :class:`VariantGroup` object. + """ + return get_variant_groups_by_ids([variant_group_id])[0] + + +# Feature + +def get_features_by_ids(feature_id_list): + """ + :param list feature_id_list: A list of CIViC feature IDs to query against to cache and (as needed) CIViC. + :returns: A list of :class:`Gene`, `Fusion`, and/or `Factor` objects. + """ + logging.info('Getting features...') + features = [] + for feature_id in feature_id_list: + feature = None + try: + feature = _get_element_by_id('gene', feature_id) + except: + pass + try: + feature = _get_element_by_id('fusion', feature_id) + except: + pass + try: + feature = _get_element_by_id('factor', feature_id) + except: + pass + if feature is None: + raise Exception("Feature {} not found".format(feature_id)) + else: + features.append(feature) + variant_ids = set() + for feature in features: + feature._include_status = ['accepted', 'submitted', 'rejected'] + for variant in feature.variants: + variant_ids.add(variant.id) + if variant_ids: + logging.info('Caching variant details...') + _get_elements_by_ids('variant', variant_ids) + for feature in features: + for variant in feature.variants: + variant.update() + return features + + +def get_feature_by_id(feature_id): + """ + :param int gene_id: A single CIViC feature ID. + :returns: A :class:`Gene`, `Fusion`, or `Factor` object. + """ + return get_features_by_ids([feature_id])[0] + + +def get_genes_by_ids(gene_id_list): + """ + :param list gene_id_list: A list of CIViC gene feature IDs to query against to cache and (as needed) CIViC. + :returns: A list of :class:`Gene` objects. + """ + logging.info('Getting genes...') + genes = _get_elements_by_ids('gene', gene_id_list) + variant_ids = set() + for gene in genes: + gene._include_status = ['accepted', 'submitted', 'rejected'] + for variant in gene.variants: + variant_ids.add(variant.id) + if variant_ids: + logging.info('Caching variant details...') + _get_elements_by_ids('variant', variant_ids) + for gene in genes: + for variant in gene.variants: + variant.update() + return genes + + +def get_gene_by_id(gene_id): + """ + :param int gene_id: A single CIViC gene feature ID. + :returns: A :class:`Gene` object. + """ + return get_genes_by_ids([gene_id])[0] + + +def get_fusions_by_ids(fusion_id_list): + """ + :param list fusion_id_list: A list of CIViC fusion feature IDs to query against to cache and (as needed) CIViC. + :returns: A list of :class:`Fusion` objects. + """ + logging.info('Getting fusions...') + fusions = _get_elements_by_ids('fusion', fusion_id_list) + variant_ids = set() + for fusion in fusions: + fusion._include_status = ['accepted', 'submitted', 'rejected'] + for variant in fusion.variants: + variant_ids.add(variant.id) + if variant_ids: + logging.info('Caching variant details...') + _get_elements_by_ids('variant', variant_ids) + for fusion in fusions: + for variant in fusion.variants: + variant.update() + return fusions + + +def get_fusion_by_id(fusion_id): + """ + :param int fusion_id: A single CIViC fusion feature ID. + :returns: A :class:`Fusion` object. + """ + return get_fusions_by_ids([fusion_id])[0] + + +def get_factors_by_ids(factor_id_list): + """ + :param list factor_id_list: A list of CIViC factor feature IDs to query against to cache and (as needed) CIViC. + :returns: A list of :class:`Factor` objects. + """ + logging.info('Getting factors...') + factors = _get_elements_by_ids('factor', factor_id_list) + variant_ids = set() + for factor in factors: + factor._include_status = ['accepted', 'submitted', 'rejected'] + for variant in factor.variants: + variant_ids.add(variant.id) + if variant_ids: + logging.info('Caching variant details...') + _get_elements_by_ids('variant', variant_ids) + for factor in factors: + for variant in factor.variants: + variant.update() + return factors + + +def get_factor_by_id(factor_id): + """ + :param int factor_id: A single CIViC factor feature ID. + :returns: A :class:`Factor` object. + """ + return get_factors_by_ids([factor_id])[0] + + +# Source + +def get_sources_by_ids(source_id_list): + """ + :param list source_id_list: A list of CIViC source IDs to query against to cache and (as needed) CIViC. + :returns: A list of :class:`Source` objects. + """ + logging.info('Getting sources...') + sources = _get_elements_by_ids('source', source_id_list) + return sources + + +def get_source_by_id(source_id): + """ + :param int source_id: A single CIViC source ID. + :returns: A :class:`Source` object. + """ + return get_sources_by_ids([source_id])[0] + + +# Disease + +def get_diseases_by_ids(disease_id_list): + """ + :param list disease_id_list: A list of CIViC disease IDs to query against to cache and (as needed) CIViC. + :returns: A list of :class:`Disease` objects. + """ + logging.info('Getting diseases...') + diseases = _get_elements_by_ids('disease', disease_id_list) + return diseases + + +def get_disease_by_id(disease_id): + """ + :param int disease_id: A single CIViC disease ID. + :returns: A :class:`Disease` object. + """ + return get_diseases_by_ids([disease_id])[0] + + +# Therapy + +def get_therapies_by_ids(therapy_id_list): + """ + :param list therapy_id_list: A list of CIViC therapy IDs to query against to cache and (as needed) CIViC. + :returns: A list of :class:`Therapy` objects. + """ + logging.info('Getting therapies...') + therapies = _get_elements_by_ids('therapy', therapy_id_list) + return therapies + + +def get_therapy_by_id(therapy_id): + """ + :param int therapy_id: A single CIViC therapy ID. + :returns: A :class:`Therapy` object. + """ + return get_therapies_by_ids([therapy_id])[0] + + +# Phenotype + +def get_phenotypes_by_ids(phenotype_id_list): + """ + :param list phenotype_id_list: A list of CIViC phenotype IDs to query against to cache and (as needed) CIViC. + :returns: A list of :class:`Phenotype` objects. + """ + logging.info('Getting phenotypes...') + phenotypes = _get_elements_by_ids('phenotype', phenotype_id_list) + return phenotypes + + +def get_phenotype_by_id(phenotype_id): + """ + :param int phenotype_id: A single CIViC phenotype ID. + :returns: A :class:`Phenotype` object. + """ + return get_phenotypes_by_ids([phenotype_id])[0] + + +########### +# Get All # +########### + +# Assertion + +def get_all_assertions(include_status=['accepted','submitted','rejected'], allow_cached=True): + """ + Queries CIViC for all assertions. + + :param list include_status: A list of statuses. Only assertions matching the given statuses will be returned. + :param bool allow_cached: Indicates whether or not object retrieval from CACHE is allowed. If **False** it will query the CIViC database directly. + :returns: A list of :class:`Assertion` objects. + """ + assertions = _get_elements_by_ids('assertion', allow_cached=allow_cached, get_all=True) + return [a for a in assertions if a.status in include_status] + + +# Molecular Profile + +def get_all_molecular_profiles(include_status=['accepted', 'submitted', 'rejected'], allow_cached=True): + """ + Queries CIViC for all molecular profiles. + + :param list include_status: A list of statuses. Only molecular profiles and their associated entities matching the given statuses will be returned. Use **None** to include molecular profiles without any associated entities. + :param bool allow_cached: Indicates whether or not object retrieval from CACHE is allowed. If **False** it will query the CIViC database directly. + :returns: A list of :class:`MolecularProfile` objects. + """ + mps = _get_elements_by_ids('molecular_profile', allow_cached=allow_cached, get_all=True) + if include_status: + assert CACHE.get('evidence_items_all_ids', False) + resp = list() + for mp in mps: + mp._include_status = include_status + if mp.evidence: + resp.append(mp) + return resp + else: + return mps + + +# Variant + +def get_all_variants(include_status=['accepted', 'submitted', 'rejected'], allow_cached=True): + """ + Queries CIViC for all variants. + + :param list include_status: A list of statuses. Only variants and their associated entities matching the given statuses will be returned. Use **None** to include variants without any associated entities. + :param bool allow_cached: Indicates whether or not object retrieval from CACHE is allowed. If **False** it will query the CIViC database directly. + :returns: A list of :class:`Variant` objects. + """ + variants = _get_elements_by_ids('variant', allow_cached=allow_cached, get_all=True) + if include_status: + assert CACHE.get('evidence_items_all_ids', False) + assert CACHE.get('assertions_all_ids', False) + resp = list() + for v in variants: + v._include_status = include_status + if v.molecular_profiles: + resp.append(v) + return resp + else: + return variants + + +def get_all_gene_variants(include_status=['accepted', 'submitted', 'rejected'], allow_cached=True): + """ + Queries CIViC for all gene variants. + + :param list include_status: A list of statuses. Only variants and their associated entities matching the given statuses will be returned. Use **None** to include variants without any associated entities. + :param bool allow_cached: Indicates whether or not object retrieval from CACHE is allowed. If **False** it will query the CIViC database directly. + :returns: A list of :class:`Variant` objects of **subtype** **gene_variant**. + """ + variants = get_all_variants(include_status=include_status, allow_cached=True) + return [v for v in variants if v.subtype == 'gene_variant'] + + +def get_all_fusion_variants(include_status=['accepted', 'submitted', 'rejected'], allow_cached=True): + """ + Queries CIViC for all fusion variants. + + :param list include_status: A list of statuses. Only variants and their associated entities matching the given statuses will be returned. Use **None** to include variants without any associated entities. + :param bool allow_cached: Indicates whether or not object retrieval from CACHE is allowed. If **False** it will query the CIViC database directly. + :returns: A list of :class:`Variant` objects of **subtype** **fusion_variant**. + """ + variants = get_all_variants(include_status=include_status, allow_cached=True) + return [v for v in variants if v.subtype == 'fusion_variant'] + + +def get_all_factor_variants(include_status=['accepted', 'submitted', 'rejected'], allow_cached=True): + """ + Queries CIViC for all factor variants. + + :param list include_status: A list of statuses. Only variants and their associated entities matching the given statuses will be returned. Use **None** to include variants without any associated entities. + :param bool allow_cached: Indicates whether or not object retrieval from CACHE is allowed. If **False** it will query the CIViC database directly. + :returns: A list of :class:`Variant` objects of **subtype** **factor_variant**. + """ + variants = get_all_variants(include_status=include_status, allow_cached=True) + return [v for v in variants if v.subtype == 'factor_variant'] + + +# Variant Group + +def get_all_variant_groups(allow_cached=True): + """ + Queries CIViC for all variant groups. + + :param bool allow_cached: Indicates whether or not object retrieval from CACHE is allowed. If **False** it will query the CIViC database directly. + :returns: A list of :class:`VariantGroup` objects. + """ + variant_groups = _get_elements_by_ids('variant_group', allow_cached=allow_cached, get_all=True) + return variant_groups + + +# Feature + +def get_all_features(include_status=['accepted','submitted','rejected'], allow_cached=True): + """ + Queries CIViC for all features. + + :param list include_status: A list of statuses. Only features and their associated entities matching the given statuses will be returned. Use **None** to include features without any associated entities. + :param bool allow_cached: Indicates whether or not object retrieval from CACHE is allowed. If **False** it will query the CIViC database directly. + :returns: A list of :class:`Gene`, :class:`Fusion`, and/or :class:`Factor` objects. + """ + genes = _get_elements_by_ids('gene', get_all=True, allow_cached=allow_cached) + fusions = _get_elements_by_ids('fusion', get_all=True, allow_cached=allow_cached) + factors = _get_elements_by_ids('factor', get_all=True, allow_cached=allow_cached) + features = [] + features.extend(genes) + features.extend(fusions) + features.extend(factors) + if include_status: + assert CACHE.get('variants_all_ids', False) + assert CACHE.get('evidence_items_all_ids', False) + resp = list() + for f in features: + f._include_status = include_status + if f.variants: + resp.append(f) + return resp + else: + return features + + + +def get_all_genes(include_status=['accepted','submitted','rejected'], allow_cached=True): + """ + Queries CIViC for all gene features. + + :param list include_status: A list of statuses. Only genes and their associated entities matching the given statuses will be returned. Use **None** to include genes without any associated entities. + :param bool allow_cached: Indicates whether or not object retrieval from CACHE is allowed. If **False** it will query the CIViC database directly. + :returns: A list of :class:`Gene` objects. + """ + genes = _get_elements_by_ids('gene', get_all=True, allow_cached=allow_cached) + if include_status: + assert CACHE.get('variants_all_ids', False) + assert CACHE.get('evidence_items_all_ids', False) + resp = list() + for g in genes: + g._include_status = include_status + if g.variants: + resp.append(g) + return resp + else: + return genes + + +def get_all_fusions(include_status=['accepted','submitted','rejected'], allow_cached=True): + """ + Queries CIViC for all fusion features. + + :param list include_status: A list of statuses. Only fusions and their associated entities matching the given statuses will be returned. Use **None** to include fusions without any associated entities. + :param bool allow_cached: Indicates whether or not object retrieval from CACHE is allowed. If **False** it will query the CIViC database directly. + :returns: A list of :class:`Fusion` objects. + """ + fusions = _get_elements_by_ids('fusion', get_all=True, allow_cached=allow_cached) + if include_status: + assert CACHE.get('variants_all_ids', False) + assert CACHE.get('evidence_items_all_ids', False) + resp = list() + for f in fusions: + f._include_status = include_status + if f.variants: + resp.append(f) + return resp + else: + return fusions + + +def get_all_factors(include_status=['accepted','submitted','rejected'], allow_cached=True): + """ + Queries CIViC for all factor features. + + :param list include_status: A list of statuses. Only factors and their associated entities matching the given statuses will be returned. Use **None** to include factors without any associated entities. + :param bool allow_cached: Indicates whether or not object retrieval from CACHE is allowed. If **False** it will query the CIViC database directly. + :returns: A list of :class:`Factor` objects. + """ + factors = _get_elements_by_ids('factor', get_all=True, allow_cached=allow_cached) + if include_status: + assert CACHE.get('variants_all_ids', False) + assert CACHE.get('evidence_items_all_ids', False) + resp = list() + for f in factors: + f._include_status = include_status + if f.variants: + resp.append(f) + return resp + else: + return factors + + +# Evidence +def get_all_evidence(include_status=['accepted','submitted','rejected'], allow_cached=True): + """ + Queries CIViC for all evidence items. -def get_variant_group_by_id(variant_group_id): - return get_variant_groups_by_ids([variant_group_id])[0] + :param list include_status: A list of statuses. Only evidence items matching the given statuses will be returned. + :param bool allow_cached: Indicates whether or not object retrieval from CACHE is allowed. If **False** it will query the CIViC database directly. + :returns: A list of :class:`EvidenceItem` objects. + """ + evidence = _get_elements_by_ids('evidence', get_all=True, allow_cached=allow_cached) + return [e for e in evidence if e.status in include_status] -def _build_coordinate_table(variants): - variant_records = list() - for v in variants: - c = v.coordinates - start = getattr(c, 'start', None) - stop = getattr(c, 'stop', None) - chr = getattr(c, 'chromosome', None) - alt = getattr(c, 'variant_bases', None) - ref = getattr(c, 'reference_bases', None) - if all([start, stop, chr]): - variant_records.append([chr, start, stop, alt, ref, hash(v)]) - else: - continue - #start = getattr(c, 'start2', None) - #stop = getattr(c, 'stop2', None) - #chr = getattr(c, 'chromosome2', None) - #if all([start, stop, chr]): - # variant_records.append([chr, start, stop, None, None, hash(v)]) - df = pd.DataFrame.from_records( - variant_records, - columns=['chr', 'start', 'stop', 'alt', 'ref', 'v_hash'] - ).sort_values(by=['chr', 'start', 'stop', 'alt', 'ref']) - MODULE.COORDINATE_TABLE = df - MODULE.COORDINATE_TABLE_START = df.start.sort_values() - MODULE.COORDINATE_TABLE_STOP = df.stop.sort_values() - MODULE.COORDINATE_TABLE_CHR = df.chr.sort_values() +# Source +def get_all_sources(include_status=['accepted','submitted','rejected'], allow_cached=True): + """ + Queries CIViC for all sources. -def get_all_molecular_profiles(include_status=['accepted', 'submitted', 'rejected'], allow_cached=True): - mps = _get_elements_by_ids('molecular_profile', allow_cached=allow_cached, get_all=True) + :param list include_status: A list of statuses. Only sources and their associated entities matching the given statuses will be returned. + :param bool allow_cached: Indicates whether or not object retrieval from CACHE is allowed. If **False** it will query the CIViC database directly. + :returns: A list of :class:`Source` objects. + """ + sources = _get_elements_by_ids('source', get_all=True, allow_cached=allow_cached) if include_status: assert CACHE.get('evidence_items_all_ids', False) resp = list() - for mp in mps: - mp._include_status = include_status - if mp.evidence: - resp.append(mp) + for s in sources: + s._include_status = include_status + if s.evidence_items: + resp.append(s) return resp else: - return mps + return sources -def get_all_variants(include_status=['accepted', 'submitted', 'rejected'], allow_cached=True): - variants = _get_elements_by_ids('variant', allow_cached=allow_cached, get_all=True) +# Disease + +def get_all_diseases(include_status=['accepted','submitted','rejected'], allow_cached=True): + """ + Queries CIViC for all diseases. + + :param list include_status: A list of statuses. Only diseases and their associated entities matching the given statuses will be returned. + :param bool allow_cached: Indicates whether or not object retrieval from CACHE is allowed. If **False** it will query the CIViC database directly. + :returns: A list of :class:`Disease` objects. + """ + diseases = _get_elements_by_ids('disease', get_all=True, allow_cached=allow_cached) if include_status: assert CACHE.get('evidence_items_all_ids', False) assert CACHE.get('assertions_all_ids', False) resp = list() - for v in variants: - v._include_status = include_status - if v.molecular_profiles: - resp.append(v) + for d in diseases: + d._include_status = include_status + if d.evidence_items or d.assertions: + resp.append(d) return resp else: - return variants + return diseases -def get_all_variant_groups(allow_cached=True): - variant_groups = _get_elements_by_ids('variant_group', allow_cached=allow_cached, get_all=True) - return variant_groups +# Therapy -def search_variants_by_allele_registry_id(caid): +def get_all_therapies(include_status=['accepted','submitted','rejected'], allow_cached=True): """ - Search the cache for variants matching the queried Allele Registry ID (CAID) + Queries CIViC for all therapies. + + :param list include_status: A list of statuses. Only therapies and their associated entities matching the given statuses will be returned. + :param bool allow_cached: Indicates whether or not object retrieval from CACHE is allowed. If **False** it will query the CIViC database directly. + :returns: A list of :class:`Therapy` objects. + """ + therapies = _get_elements_by_ids('therapy', get_all=True, allow_cached=allow_cached) + if include_status: + assert CACHE.get('evidence_items_all_ids', False) + assert CACHE.get('assertions_all_ids', False) + resp = list() + for t in therapies: + t._include_status = include_status + if t.evidence_items or t.assertions: + resp.append(t) + return resp + else: + return therapies - :param String caid: Allele Registry ID to query - :return: Returns a list of variant hashes matching the Allele Registry ID +# Phenotype + +def get_all_phenotypes(include_status=['accepted','submitted','rejected'], allow_cached=True): """ - return search_variants_by_attribute('allele_registry_id', caid) + Queries CIViC for all phenotypes. -def search_variants_by_name(name): + :param list include_status: A list of statuses. Only phenotypes and their associated entities matching the given statuses will be returned. + :param bool allow_cached: Indicates whether or not object retrieval from CACHE is allowed. If **False** it will query the CIViC database directly. + :returns: A list of :class:`Phenotype` objects. """ - Search the cache for variants matching the queried name + phenotypes = _get_elements_by_ids('phenotype', get_all=True, allow_cached=allow_cached) + if include_status: + assert CACHE.get('evidence_items_all_ids', False) + assert CACHE.get('assertions_all_ids', False) + resp = list() + for p in phenotypes: + p._include_status = include_status + if p.evidence_items or p.assertions: + resp.append(p) + return resp + else: + return phenotypes - :param String name: Variant name to query - :return: Returns a list of variant hashes matching the name +######################### +# Search by Coordinates # +######################### + +def search_evidence_by_coordinates(coordinates, search_mode='any'): """ - return search_variants_by_attribute('name', name) + Search the cache for variants matching provided coordinates using the corresponding search mode and return all evidence items linked to any molecular profile involving those variants. -def search_variants_by_hgvs(hgvs): + :param CoordinateQuery coordinate_query: Coordinates to query + + :param any,query_encompassing,variant_encompassing,exact search_mode: + *any* : any overlap between a query and a variant is a match\n + *query_encompassing* : CIViC variant records must fit within the coordinates of the query\n + *record_encompassing* : CIViC variant records must encompass the coordinates of the query\n + *exact* : variants must match coordinates precisely, as well as reference allele(s) and alternate allele(s). + Use ``'*'`` in the coordinate_query as a wildcard for reference and/or alternate alleles. + Using ``None`` in the coordinate_query for reference or alternate alleles will only match + variants that have no reference or alternate alleles, respectively (e.g. indels) \n + search_mode is *any* by default + + :return: A list of :class:`EvidenceItem` objects linked to molecular profiles involving variants matching the coordinates and search_mode """ - Search the cache for variants matching the queried HGVS expression + variants = search_variants_by_coordinates(coordinates, search_mode=search_mode) + evidence = set() + for v in variants: + for mp in v.molecular_profiles: + if mp.evidence: + evidence.update(mp.evidence) + return list(evidence) - :param String name: HGVS expression to query - :return: Returns a list of variant hashes matching the HGVS expression +def search_assertions_by_coordinates(coordinates, search_mode='any'): """ - return search_variants_by_list_field('hgvs_expressions', hgvs) + Search the cache for variants matching provided coordinates using the corresponding search mode and return all assertions linked to any molecular profile involving those variants. -def search_variants_by_attribute(attribute, value): - variants = get_all_variants() - return [v for v in variants if getattr(v, attribute) == value] + :param CoordinateQuery coordinate_query: Coordinates to query + + :param any,query_encompassing,variant_encompassing,exact search_mode: + *any* : any overlap between a query and a variant is a match\n + *query_encompassing* : CIViC variant records must fit within the coordinates of the query\n + *record_encompassing* : CIViC variant records must encompass the coordinates of the query\n + *exact* : variants must match coordinates precisely, as well as reference allele(s) and alternate allele(s). + Use ``'*'`` in the coordinate_query as a wildcard for reference and/or alternate alleles. + Using ``None`` in the coordinate_query for reference or alternate alleles will only match + variants that have no reference or alternate alleles, respectively (e.g. indels) \n + search_mode is *any* by default + + :return: A list of :class:`Assertion` objects linked to molecular profiles involving variants matching the coordinates and search_mode + """ + variants = search_variants_by_coordinates(coordinates, search_mode=search_mode) + assertions = set() + for v in variants: + for mp in v.molecular_profiles: + if mp.assertions: + assertions.update(mp.assertions) + return list(assertions) -def search_variants_by_list_field(field, value): - variants = get_all_variants() - matched_variants = [] - return [v for v in variants if value in getattr(v, field)] def search_variants_by_coordinates(coordinate_query, search_mode='any'): """ @@ -2469,71 +3026,260 @@ def append_match(matches_list, query, ct_row): return dict(matches) -def get_genes_by_ids(gene_id_list): +################################## +# Search/get by other attributes # +################################## + +# Genes + +def get_gene_by_entrez_id(entrez_id): """ - :param list gene_id_list: A list of CIViC gene IDs to query against to cache and (as needed) CIViC. - :returns: A list of :class:`Gene` objects. + :param str entrez_id: A gene `Entrez ID`_. + :returns: A :class:`Gene` object. + + .. _Entrez ID: https://www.ncbi.nlm.nih.gov/gene/ """ - logging.info('Getting genes...') - genes = _get_elements_by_ids('gene', gene_id_list) # Advanced search results are incomplete - variant_ids = set() - for gene in genes: - gene._include_status = ['accepted', 'submitted', 'rejected'] - for variant in gene.variants: - variant_ids.add(variant.id) - if variant_ids: - logging.info('Caching variant details...') - _get_elements_by_ids('variant', variant_ids) - for gene in genes: - for variant in gene.variants: - variant.update() - return genes + genes = _get_elements_by_ids('gene', get_all=True) + matching_genes = [t for t in genes if g.entrez_id == entrez_id] + if len(matching_genes) == 0: + raise Exception("No Gene with Entrez ID: {}".format(entrez_id)) + return matching_genes[0] -def get_gene_by_id(gene_id): +def get_gene_by_name(name): """ - :param int gene_id: A single CIViC gene ID. + :param str name: A `HGNC Gene Symbol`_. :returns: A :class:`Gene` object. + + .. _HGNC Gene Symbol: https://www.genenames.org/ """ - return get_genes_by_ids([gene_id])[0] + genes = _get_elements_by_ids('gene', get_all=True) + matching_genes = [t for t in genes if g.name == name] + if len(matching_genes) == 0: + raise Exception("No Gene with HGNC Gene Symbol: {}".format(name)) + return matching_genes[0] -def get_all_genes(include_status=['accepted','submitted','rejected'], allow_cached=True): +# Factors + +def get_factor_by_ncit_id(ncit_id): """ - Queries CIViC for all genes. + :param str ncit_id: A factor `NCIthesaurus ID`_. + :returns: A :class:`Factor` object. - :param list include_status: A list of statuses. Only genes and their associated entities matching the given statuses will be returned. - :param bool allow_cached: Indicates whether or not object retrieval from CACHE is allowed. If **False** it will query the CIViC database directly. - :returns: A list of :class:`Gene` objects. + .. _NCIthesaurus ID: https://ncithesaurus.nci.nih.gov/ncitbrowser/ """ - genes = _get_elements_by_ids('gene', get_all=True, allow_cached=allow_cached) - if include_status: - assert CACHE.get('variants_all_ids', False) - assert CACHE.get('evidence_items_all_ids', False) - resp = list() - for g in genes: - g._include_status = include_status - if g.variants: - resp.append(g) - return resp - else: - return genes + factors = _get_elements_by_ids('factor', get_all=True) + matching_factors = [f for f in factors if f.ncit_id == ncit_id] + if len(matching_factors) == 0: + raise Exception("No Factor with NCIt ID: {}".format(ncit_id)) + return matching_factors[0] -def get_all_evidence(include_status=['accepted','submitted','rejected'], allow_cached=True): - evidence = _get_elements_by_ids('evidence', get_all=True, allow_cached=allow_cached) - return [e for e in evidence if e.status in include_status] +def get_factor_by_name(name): + """ + :param str name: A factor name or full name. + :returns: A :class:`Factor` object. + """ + factors = _get_elements_by_ids('factor', get_all=True) + matching_factors = [f for f in factors if f.name == name or f.full_name == name] + if len(matching_factors) == 0: + raise Exception("No Factor with name or full name: {}".format(name)) + return matching_factors[0] + + +# Fusion +def get_fusion_by_name(name): + """ + :param str name: A fusion name. + :returns: A :class:`Fusion` object. + """ + fusions = _get_elements_by_ids('fusion', get_all=True) + matching_fusions = [f for f in fusions if f.name == name] + if len(matching_fusions) == 0: + raise Exception("No Fusion with name: {}".format(name)) + return matching_fusions[0] + + +def search_fusions_by_partner_gene_id(partner_gene_id): + """ + :param int partner_gene_id: A CIViC ID of one of the gene partners. + :returns: A list of :class:`Fusion` object. + """ + fusions = _get_elements_by_ids('fusion', get_all=True) + matching_fusions = [f for f in fusions if f.five_prime_gene_id == partner_gene_id or f.three_prime_gene_id == partner_gene_id] + return matching_fusions -def get_HPO_terms_by_ids(hpo_id_list): - if not HPO_TERMS: - _load_HPO() - return [HPO_TERMS[x] for x in hpo_id_list] +# Variants -def _load_HPO(): - url = 'https://civicdb.org/api/phenotypes?count=100000' - resp = requests.get(url) - resp.raise_for_status() - for h in resp.json(): - HPO_TERMS[h['id']] = h +def search_variants_by_allele_registry_id(caid): + """ + Search the cache for variants matching the queried Allele Registry ID (CAID) + + :param str caid: `Allele Registry ID`_ to query + :return: Returns a list of variant hashes matching the Allele Registry ID + + .. _Allele Registry ID: https://reg.clinicalgenome.org/redmine/projects/registry/genboree_registry/landing + """ + return search_variants_by_attribute('allele_registry_id', caid) + + +def search_variants_by_name(name): + """ + Search the cache for variants matching the queried name + + :param str name: Variant name to query + :return: Returns a list of variant hashes matching the name + """ + return search_variants_by_attribute('name', name) + + +def search_variants_by_hgvs(hgvs): + """ + Search the cache for variants matching the queried HGVS expression + + :param str name: HGVS expression to query + :return: Returns a list of variant hashes matching the HGVS expression + """ + return search_variants_by_list_field('hgvs_expressions', hgvs) + + +def search_variants_by_attribute(attribute, value): + variants = get_all_variants() + return [v for v in variants if hasattr(v, attribute) and getattr(v, attribute) == value] + + +def search_variants_by_list_field(field, value): + variants = get_all_variants() + matched_variants = [] + return [v for v in variants if hasattr(v, field) and value in getattr(v, field)] + + +# Source + +def get_pubmed_source_by_id(pmid): + """ + :param str pmid: A PubMed ID. + :returns: A :class:`Source` object. + """ + sources = _get_elements_by_ids('source', get_all=True) + matching_sources = [s for s in sources if s.citation_id == pmid and s.source_type == 'PUBMED'] + if len(matching_sources) == 0: + raise Exception("No PubMed sources with PMID: {}".format(pmid)) + return matching_sources[0] + + +def get_ash_source_by_doi(doi): + """ + :param str doi: A ASH abstract DOI. + :returns: A :class:`Source` object. + """ + sources = _get_elements_by_ids('source', get_all=True) + matching_sources = [s for s in sources if s.citation_id == doi and s.source_type == 'ASH'] + if len(matching_sources) == 0: + raise Exception("No ASH sources with DOI: {}".format(citation_id)) + return matching_sources[0] + + +def get_asco_source_by_id(asco_id): + """ + :param str asco_id: A ASCO Web ID. This is the identification number found in the URL of the abstract. + :returns: A :class:`Source` object. + """ + sources = _get_elements_by_ids('source', get_all=True) + matching_sources = [s for s in sources if s.citation_id == asco_id and s.source_type == 'ASCO'] + if len(matching_sources) == 0: + raise Exception("No ASCO sources with ID: {}".format(asco_id)) + return matching_sources[0] + + +# Disease + +def get_disease_by_doid(doid): + """ + :param str doid: A single `Disease Ontology ID`_. + :returns: A :class:`Disease` object. + + .. _Disease Ontology ID: https://disease-ontology.org/ + """ + diseases = _get_elements_by_ids('disease', get_all=True) + matching_diseases = [d for d in diseases if d.doid == doid] + if len(matching_diseases) == 0: + raise Exception("No diseases with DO ID: {}".format(doid)) + return matching_diseases[0] + + +def get_disease_by_name(name): + """ + :param str name: A single `Disease Ontology`_ name. + :returns: A :class:`Disease` object. + + .. _Disease Ontology: https://disease-ontology.org/ + """ + diseases = _get_elements_by_ids('disease', get_all=True) + matching_diseases = [d for d in diseases if d.name == name] + if len(matching_diseases) == 0: + raise Exception("No diseases with DO name: {}".format(name)) + return matching_diseases[0] + + +# Therapy + +def get_therapy_by_ncit_id(ncit_id): + """ + :param str ncit_id: A single `NCIthesaurus ID`_. + :returns: A :class:`Therapy` object. + + .. _NCIthesaurus ID: https://ncithesaurus.nci.nih.gov/ncitbrowser/ + """ + therapies = _get_elements_by_ids('therapy', get_all=True) + matching_therapies = [t for t in therapies if t.ncit_id == ncit_id] + if len(matching_therapies) == 0: + raise Exception("No therapies with NCIt ID: {}".format(ncit_id)) + return matching_therapies[0] + + +def get_therapy_by_name(name): + """ + :param str name: A single `NCIthesaurus`_ name. + :returns: A :class:`Therapy` object. + + .. _NCIthesaurus: https://ncithesaurus.nci.nih.gov/ncitbrowser/ + """ + therapies = _get_elements_by_ids('therapy', get_all=True) + matching_therapies = [t for t in therapies if t.name == name] + if len(matching_therapies) == 0: + raise Exception("No therapies with NCIt name: {}".format(name)) + return matching_therapies[0] + + +# Phenotype + +def get_phenotype_by_hpo_id(hpo_id): + """ + :param str hpo_id: A single `Human Phenotype Ontology ID`_. + :returns: A :class:`Phenotype` object. + + .. _Human Phenotype Ontology ID: https://hpo.jax.org/ + """ + phenotypes = _get_elements_by_ids('phenotype', get_all=True) + matching_phenotypes = [p for p in phenotypes if p.hpo_id == hpo_id] + if len(matching_phenotypes) == 0: + raise Exception("No phenotypes with HPO ID: {}".format(hpo_id)) + return matching_phenotypes[0] + + +def get_phenotype_by_name(name): + """ + :param str name: A single `Human Phenotype Ontology`_ name (sometimes also referred to as HPO class). + :returns: A :class:`Phenotype` object. + + .. _Human Phenotype Ontology: https://hpo.jax.org/ + """ + phenotypes = _get_elements_by_ids('phenotype', get_all=True) + matching_phenotypes = [p for p in phenotypes if p.name == name] + if len(matching_phenotypes) == 0: + raise Exception("No phenotypes with name: {}".format(name)) + return matching_phenotypes[0] diff --git a/civicpy/cli.py b/civicpy/cli.py index e98701c..8cef2ce 100644 --- a/civicpy/cli.py +++ b/civicpy/cli.py @@ -36,7 +36,7 @@ def create_vcf(vcf_file_path, include_status): """Create a VCF file of CIViC variants""" with open(vcf_file_path, "w") as fh: writer = VCFWriter(fh) - for variant in civic.get_all_variants(include_status=include_status): + for variant in civic.get_all_gene_variants(include_status=include_status): if variant.is_valid_for_vcf(): writer.addrecord(variant) writer.writerecords() diff --git a/civicpy/data/test_cache.pkl b/civicpy/data/test_cache.pkl index 2cfd692..bc66823 100644 Binary files a/civicpy/data/test_cache.pkl and b/civicpy/data/test_cache.pkl differ diff --git a/civicpy/graphql_payloads.py b/civicpy/graphql_payloads.py new file mode 100644 index 0000000..60dcbe1 --- /dev/null +++ b/civicpy/graphql_payloads.py @@ -0,0 +1,733 @@ +def _construct_get_gene_payload(): + return """ + query gene($id: Int!) { + gene(id: $id) { + id + name + description + entrez_id: entrezId + aliases: featureAliases + sources { + id + } + } + }""" + + +def _construct_get_all_genes_payload(): + return """ + query genes($after: String) { + genes(after: $after, evidenceStatusFilter: ALL) { + totalCount + pageInfo { + hasNextPage + endCursor + } + nodes { + id + name + description + entrez_id: entrezId + aliases: featureAliases + sources { + id + } + } + } + }""" + + +def _construct_get_factor_payload(): + return """ + query factor($id: Int!) { + factor(id: $id) { + name + full_name: fullName + description + ncit_id: ncitId + aliases: featureAliases + sources { + id + } + } + }""" + + +def _construct_get_all_factors_payload(): + return """ + query factors($after: String) { + factors(after: $after, evidenceStatusFilter: ALL) { + totalCount + pageInfo { + hasNextPage + endCursor + } + nodes { + id + name + full_name: fullName + description + ncit_id: ncitId + aliases: featureAliases + sources { + id + } + } + } + }""" + + +def _construct_get_fusion_payload(): + return """ + query fusion($id: Int!) { + fusion(id: $id) { + id + name + description + threePrimeGene { + id + } + fivePrimeGene { + id + } + three_prime_partner_status: threePrimePartnerStatus + five_prime_partner_status: fivePrimePartnerStatus + aliases: featureAliases + sources { + id + } + } + }""" + + +def _construct_get_all_fusions_payload(): + return """ + query fusions($after: String) { + fusions(after: $after, evidenceStatusFilter: ALL) { + totalCount + pageInfo { + hasNextPage + endCursor + } + nodes { + id + name + description + threePrimeGene { + id + } + fivePrimeGene { + id + } + three_prime_partner_status: threePrimePartnerStatus + five_prime_partner_status: fivePrimePartnerStatus + aliases: featureAliases + sources { + id + } + } + } + }""" + + +def _construct_get_molecular_profile_payload(): + return """ + query molecularProfile($id: Int!) { + molecular_profile: molecularProfile(id: $id) { + id + description + molecular_profile_score: molecularProfileScore + name + variants { + id + } + aliases: molecularProfileAliases + parsed_name: parsedName { + type: __typename + ... on MolecularProfileTextSegment { + text + } + ... on Feature { + id + name + featureType + } + ... on Variant { + id + name + deprecated + } + } + sources { + id + } + } + }""" + + + +def _construct_get_all_molecular_profiles_payload(): + return """ + query molecularProfiles($after: String) { + molecular_profiles: molecularProfiles(after: $after, evidenceStatusFilter: ALL) { + totalCount + pageInfo { + hasNextPage + endCursor + } + nodes { + id + description + molecular_profile_score: molecularProfileScore + name + variants { + id + } + aliases: molecularProfileAliases + parsed_name: parsedName { + type: __typename + ... on MolecularProfileTextSegment { + text + } + ... on Feature { + id + name + featureType + } + ... on Variant { + id + name + deprecated + } + } + sources { + id + } + } + } + }""" + + +def _construct_get_variant_payload(): + return """ + query variant($id: Int!) { + variant(id: $id) { + __typename + id + name + ... on GeneVariant { + allele_registry_id: alleleRegistryId + clinvar_entries: clinvarIds + hgvs_expressions: hgvsDescriptions + coordinates { + reference_build: referenceBuild + ensembl_version: ensemblVersion + chromosome + representative_transcript: representativeTranscript + start + stop + reference_bases: referenceBases + variant_bases: variantBases + } + } + ... on FactorVariant { + ncit_id: ncitId + } + feature { + id + name + featureInstance { + ... on Gene { + entrezId + } + } + } + single_variant_molecular_profile_id: singleVariantMolecularProfileId + variant_aliases: variantAliases + variant_types: variantTypes { + id + name + so_id: soid + description + url + } + } + }""" + + +def _construct_get_all_variants_payload(): + return """ + query variants($after: String) { + variants(after: $after) { + totalCount + pageInfo { + hasNextPage + endCursor + } + nodes { + __typename + id + name + ... on GeneVariant { + allele_registry_id: alleleRegistryId + clinvar_entries: clinvarIds + hgvs_expressions: hgvsDescriptions + coordinates { + reference_build: referenceBuild + ensembl_version: ensemblVersion + chromosome + representative_transcript: representativeTranscript + start + stop + reference_bases: referenceBases + variant_bases: variantBases + } + } + ... on FactorVariant { + ncit_id: ncitId + } + ... on FusionVariant { + vicc_compliant_name: viccCompliantName + five_prime_coordinates: fivePrimeCoordinates { + reference_build: referenceBuild + ensembl_version: ensemblVersion + chromosome + representative_transcript: representativeTranscript + start + stop + reference_bases: referenceBases + variant_bases: variantBases + } + three_prime_coordinates: threePrimeCoordinates { + reference_build: referenceBuild + ensembl_version: ensemblVersion + chromosome + representative_transcript: representativeTranscript + start + stop + reference_bases: referenceBases + variant_bases: variantBases + } + } + feature { + id + name + featureInstance { + ... on Gene { + entrezId + } + } + } + single_variant_molecular_profile_id: singleVariantMolecularProfileId + variant_aliases: variantAliases + variant_types: variantTypes { + id + name + so_id: soid + description + url + } + } + } + }""" + +def _construct_get_evidence_payload(): + return """ + query evidenceItem($id: Int!) { + evidence: evidenceItem(id: $id) { + id + name + significance + description + therapy_interaction_type: therapyInteractionType + evidence_direction: evidenceDirection + evidence_level: evidenceLevel + evidence_type: evidenceType + status + variant_origin: variantOrigin + molecular_profile: molecularProfile { + id + } + disease { + id + } + therapies { + id + } + phenotypes { + id + } + assertions { + id + } + source { + id + } + rating: evidenceRating + } + }""" + + + +def _construct_get_all_evidence_payload(): + return """ + query evidenceItems($after: String) { + evidence_items: evidenceItems(after: $after, status: ALL) { + totalCount + pageInfo { + hasNextPage + endCursor + } + nodes { + id + name + significance + description + therapy_interaction_type: therapyInteractionType + evidence_direction: evidenceDirection + evidence_level: evidenceLevel + evidence_type: evidenceType + status + variant_origin: variantOrigin + molecular_profile: molecularProfile { + id + } + disease { + id + } + therapies { + id + } + phenotypes { + id + } + assertions { + id + } + source { + id + } + rating: evidenceRating + } + } + }""" + + + +def _construct_get_assertion_payload(): + return """ + query assertion($id: Int!) { + assertion(id: $id) { + id + name + amp_level: ampLevel + significance + description + therapy_interaction_type: therapyInteractionType + assertion_direction: assertionDirection + assertion_type: assertionType + fda_companion_test: fdaCompanionTest + fda_regulatory_approval: regulatoryApproval + name + nccn_guideline: nccnGuideline { + name + } + nccn_guideline_version: nccnGuidelineVersion + status + summary + variant_origin: variantOrigin + molecular_profile: molecularProfile { + id + } + acmg_codes: acmgCodes { + id + code + description + } + clingen_codes: clingenCodes { + id + code + description + } + disease { + id + } + therapies { + id + } + evidenceItems { + id + } + phenotypes { + id + } + } + }""" + + +def _construct_get_all_assertions_payload(): + return """ + query assertions($after: String) { + assertions(after: $after, status: ALL) { + totalCount + pageInfo { + hasNextPage + endCursor + } + nodes { + id + name + amp_level: ampLevel + significance + description + therapy_interaction_type: therapyInteractionType + assertion_direction: assertionDirection + assertion_type: assertionType + fda_companion_test: fdaCompanionTest + fda_regulatory_approval: regulatoryApproval + name + nccn_guideline: nccnGuideline { + name + } + nccn_guideline_version: nccnGuidelineVersion + status + summary + variant_origin: variantOrigin + molecular_profile: molecularProfile { + id + } + acmg_codes: acmgCodes { + id + code + description + } + clingen_codes: clingenCodes { + id + code + description + } + disease { + id + } + therapies { + id + } + evidenceItems { + id + } + phenotypes { + id + } + } + } + }""" + + +def _construct_get_variant_group_payload(): + return """ + query variantGroup($id: Int!) { + variant_group: variantGroup(id: $id) { + id + name + description + variants(first: 100) { + nodes { + id + } + } + sources { + id + } + } + }""" + + +def _construct_get_all_variant_groups_payload(): + return """ + query variantGroups($after: String) { + variant_groups: variantGroups(after: $after) { + totalCount + pageInfo { + hasNextPage + endCursor + } + nodes { + id + name + description + variants(first: 100) { + nodes { + id + } + } + sources { + id + } + } + } + }""" + + +def _construct_get_source_payload(): + return """ + query source($id: Int!) { + source(id: $id) { + id + name + title + citation + citation_id: citationId + source_type: sourceType + abstract + asco_abstract_id: ascoAbstractId + author_string: authorString + full_journal_title: fullJournalTitle + journal + pmc_id: pmcId + publication_date: publicationDate + source_url: sourceUrl + clinical_trials: clinicalTrials { + id + name + description + nctId + url + } + } + }""" + + +def _construct_get_all_sources_payload(): + return """ + query sources($after: String) { + sources(after: $after) { + totalCount + pageInfo { + hasNextPage + endCursor + } + nodes { + id + name + title + citation + citation_id: citationId + source_type: sourceType + abstract + asco_abstract_id: ascoAbstractId + author_string: authorString + full_journal_title: fullJournalTitle + journal + pmc_id: pmcId + publication_date: publicationDate + source_url: sourceUrl + clinical_trials: clinicalTrials { + id + name + description + nctId + url + } + } + } + }""" + + +def _construct_get_disease_payload(): + return """ + query disease($id: Int!) { + disease(id: $id) { + id + name + doid + disease_url: diseaseUrl + aliases: diseaseAliases + } + }""" + + +def _construct_get_all_diseases_payload(): + return """ + query diseases($after: String) { + diseases(after: $after) { + totalCount + pageInfo { + hasNextPage + endCursor + } + nodes { + id + name + doid + disease_url: diseaseUrl + aliases: diseaseAliases + } + } + } + """ + + +def _construct_get_therapy_payload(): + return """ + query therapy($id: Int!) { + therapy(id: $id) { + id + name + ncit_id: ncitId + aliases: therapyAliases + therapy_url: therapyUrl + } + }""" + + +def _construct_get_all_therapies_payload(): + return """ + query therapies($after: String) { + therapies(after: $after) { + totalCount + pageInfo { + hasNextPage + endCursor + } + nodes { + id + name + ncit_id: ncitId + aliases: therapyAliases + therapy_url: therapyUrl + } + } + } + """ + + +def _construct_get_phenotype_payload(): + return """ + query phenotype($id: Int!) { + phenotype(id: $id) { + id + name + hpo_id: hpoId + phenotype_url: url + } + }""" + + +def _construct_get_all_phenotypes_payload(): + return """ + query phenotypes($after: String) { + phenotypes(after: $after) { + totalCount + pageInfo { + hasNextPage + endCursor + } + nodes { + id + name + hpo_id: hpoId + phenotype_url: url + } + } + } + """ diff --git a/civicpy/tests/test_civic.py b/civicpy/tests/test_civic.py index 368820d..827101f 100644 --- a/civicpy/tests/test_civic.py +++ b/civicpy/tests/test_civic.py @@ -84,7 +84,6 @@ def test_get_source_ids(self, v600e_mp): assert source.publication_date assert source.source_url assert source.title - assert source.name assert hasattr(source, 'clinical_trials') def test_get_all(self): @@ -108,12 +107,17 @@ def test_properties(self, v600e_mp): evidence = v600e_mp.evidence[0] assert evidence.molecular_profile.name == 'BRAF V600E' assert evidence.statement == evidence.description + assert len(evidence.assertions) == 0 + assert evidence.disease.name == 'Skin Melanoma' + assert len(evidence.therapies) == 1 + assert len(evidence.phenotypes) == 0 + class TestVariants(object): def test_get_all(self): variants = civic.get_all_variants() - assert len(variants) >= 2396 + assert len(variants) >= 3815 def test_get_non_rejected(self): variants = civic.get_all_variants(include_status=['accepted', 'submitted']) @@ -150,16 +154,92 @@ def test_sanitize_coordinate_bases(self): assert v.coordinates.reference_bases not in ['', '-'] assert v.coordinates.variant_bases not in ['', '-'] - def test_properties(self): + def test_shared_properties(self): variant = civic.get_variant_by_id(11) assert sorted(variant.aliases) == sorted(variant.variant_aliases) assert sorted(variant.groups) == sorted(variant.variant_groups) assert sorted(variant.types) == sorted(variant.variant_types) + assert len(variant.molecular_profiles) == 1 + assert variant.single_variant_molecular_profile.id == 11 + + +class TestGeneVariants(object): + + def test_get_all(self): + variants = civic.get_all_gene_variants() + assert len(variants) >= 3522 + for variant in variants: + assert variant.subtype == 'gene_variant' + + def test_get_by_id(self): + variant = civic.get_variant_by_id(11) + assert variant.id == 11 + assert variant.type == 'variant' + + def test_attributes(self): + variant = civic.get_variant_by_id(11) assert variant.coordinates.ensembl_version == 75 assert variant.entrez_name == "BRAF" assert variant.entrez_id == 673 + def test_properties(self): + variant = civic.get_variant_by_id(11) + assert variant.gene.id == 5 + assert variant.gene == variant.feature + assert variant.is_insertion == False + assert variant.is_deletion == False + + +class TestFusionVariants(object): + + def test_get_all(self): + variants = civic.get_all_fusion_variants() + assert len(variants) >= 263 + for variant in variants: + assert variant.subtype == 'fusion_variant' + + def test_get_by_id(self): + variant = civic.get_variant_by_id(1) + assert variant.id == 1 + assert variant.type == 'variant' + + def test_attributes(self): + variant = civic.get_variant_by_id(1) + assert variant.vicc_compliant_name == 'BCR(entrez:613)::ABL1(entrez:25)' + assert variant.five_prime_coordinates.reference_build == 'GRCH37' + assert variant.three_prime_coordinates.reference_build == 'GRCH37' + + def test_properties(self): + variant = civic.get_variant_by_id(1) + assert variant.fusion.id == 61802 + assert variant.fusion == variant.feature + + +class TestFactorVariants(object): + + def test_get_all(self): + variants = civic.get_all_factor_variants() + assert len(variants) >= 8 + for variant in variants: + assert variant.subtype == 'factor_variant' + + def test_get_by_id(self): + variant = civic.get_variant_by_id(4985) + assert variant.id == 4985 + assert variant.type == 'variant' + + def test_attributes(self): + variant = civic.get_variant_by_id(4985) + assert variant.ncit_id == 'C131459' + + def test_properties(self): + variant = civic.get_variant_by_id(4985) + assert variant.factor.id == 61746 + assert variant.factor == variant.feature + + class TestMolecularProfiles(object): + def test_get_all(self): mps = civic.get_all_molecular_profiles() assert len(mps) >= 2396 @@ -183,23 +263,30 @@ def test_get_by_id_complex_mp(self): mp_parsed_name = mp.parsed_name assert len(mp_parsed_name) == 5 egfr_gene = mp_parsed_name[0] - assert egfr_gene.type == "feature" + assert egfr_gene.type == "gene" assert egfr_gene.id == 19 assert egfr_gene.name == "EGFR" variant0 = mp_parsed_name[1] assert variant0.type == "variant" assert variant0.id == 33 assert variant0.name == "L858R" - assert variant0.deprecated is False text_segment = mp_parsed_name[2] - assert text_segment.type == "molecular_profile_text_segment" - assert text_segment.text == "OR" + assert text_segment == "OR" assert mp_parsed_name[3] == egfr_gene variant1 = mp_parsed_name[4] assert variant1.type == "variant" assert variant1.id == 133 assert variant1.name == "Exon 19 Deletion" - assert variant1.deprecated is False + + def test_properties(self): + mp = civic.get_molecular_profile_by_id(4432) + assert len(mp.evidence_sources) == 11 + assert mp.summary == mp.description + assert len(mp.evidence_items) == 12 + assert len(mp.assertions) == 0 + assert len(mp.variants) == 2 + assert len(mp.sources) == 0 + class TestVariantGroups(object): @@ -212,6 +299,11 @@ def test_get_by_id(self): assert variant_group.type == 'variant_group' assert variant_group.id == 1 + def test_properties(self): + variant_group = civic.get_variant_group_by_id(1) + assert len(variant_group.variants) == 7 + assert len(variant_group.sources) == 0 + class TestAssertions(object): @@ -245,6 +337,10 @@ def test_properties(self): assert acmg_code.id assert acmg_code.code assert acmg_code.description + assert assertion.disease.name == "Von Hippel-Lindau Disease" + assert len(assertion.therapies) == 0 + assert len(assertion.phenotypes) == 3 + assert assertion.molecular_profile.id == 1686 # Test assertion with clingen_codes assertion = civic.get_assertion_by_id(53) @@ -255,6 +351,39 @@ def test_properties(self): assert clingen_code.description +class TestFeatures(object): + + def test_get_all(self): + features = civic.get_all_features() + assert len(features) >= 407 + + def test_get_non_rejected(self): + features = civic.get_all_features(include_status=['accepted', 'submitted']) + assert len(features) >= 402 + + def test_get_accepted_only(self): + features = civic.get_all_features(include_status=['accepted']) + assert len(features) >= 322 + + def test_get_by_id(self): + feature = civic.get_feature_by_id(58) + assert feature.type == 'gene' + assert feature.id == 58 + + def test_get_by_ids(self): + features = civic.get_features_by_ids([58, 61748, 61753]) + assert features[0].type == 'gene' + assert features[0].id == 58 + assert features[1].type == 'factor' + assert features[1].id == 61748 + assert features[2].type == 'fusion' + assert features[2].id == 61753 + + def test_attributes(self): + feature = civic.get_feature_by_id(58) + assert feature.name == 'VHL' + + class TestGenes(object): def test_get_all(self): @@ -273,8 +402,181 @@ def test_get_by_id(self): gene = civic.get_gene_by_id(58) assert gene.type == 'gene' assert gene.id == 58 + + def test_attributes(self): + gene = civic.get_gene_by_id(58) assert gene.name == 'VHL' + def test_properties(self): + gene = civic.get_gene_by_id(58) + assert len(gene.variants) == 844 + assert len(gene.sources) == 4 + + +class TestFactors(object): + + def test_get_all(self): + factors = civic.get_all_factors() + assert len(factors) >= 6 + + def test_get_non_rejected(self): + factors = civic.get_all_factors(include_status=['accepted', 'submitted']) + assert len(factors) >= 6 + + def test_get_accepted_only(self): + factors = civic.get_all_factors(include_status=['accepted']) + assert len(factors) >= 2 + + def test_get_by_id(self): + factor = civic.get_factor_by_id(61748) + assert factor.type == 'factor' + assert factor.id == 61748 + + def test_attributes(self): + factor = civic.get_factor_by_id(61748) + assert factor.name == 'CK' + assert factor.full_name == 'Complex Karyotype' + + def test_properties(self): + factor = civic.get_factor_by_id(61748) + assert len(factor.variants) == 1 + assert len(factor.sources) == 0 + + +class TestFusions(object): + + def test_get_all(self): + fusions = civic.get_all_fusions() + assert len(fusions) >= 256 + + def test_get_non_rejected(self): + fusions = civic.get_all_fusions(include_status=['accepted', 'submitted']) + assert len(fusions) >= 255 + + def test_get_accepted_only(self): + fusions = civic.get_all_fusions(include_status=['accepted']) + assert len(fusions) >= 166 + + def test_get_by_id(self): + fusion = civic.get_fusion_by_id(61753) + assert fusion.type == 'fusion' + assert fusion.id == 61753 + + def test_attributes(self): + fusion = civic.get_fusion_by_id(61753) + assert fusion.name == 'MEF2D::CSF1R' + assert fusion.five_prime_gene.name == 'MEF2D' + assert fusion.three_prime_gene.name == 'CSF1R' + + def test_properties(self): + fusion = civic.get_fusion_by_id(61753) + assert len(fusion.variants) == 1 + assert len(fusion.sources) == 0 + + +class TestDiseases(object): + + def test_get_all(self): + diseases = civic.get_all_diseases() + assert len(diseases) >= 419 + + def test_get_by_id(self): + d = civic.get_disease_by_id(22) + assert d.id == 22 + assert d.type == 'disease' + + def test_attributes(self): + breast_cancer = civic.get_disease_by_id(22) + assert breast_cancer.doid == '1612' + assert breast_cancer.name == 'Breast Cancer' + assert set(breast_cancer.aliases) == { + 'Breast Tumor', + 'Malignant Neoplasm Of Breast', + 'Malignant Tumor Of The Breast', + 'Mammary Cancer', + 'Mammary Tumor', + 'Primary Breast Cancer' + } + + def test_get_by_name(self): + breast_cancer = civic.get_disease_by_name('Breast Cancer') + assert breast_cancer.id == 22 + + def test_get_by_doid(self): + breast_cancer = civic.get_disease_by_doid('1612') + assert breast_cancer.id == 22 + + def test_properties(self): + breast_cancer = civic.get_disease_by_id(22) + assert len(breast_cancer.evidence) == 280 + assert breast_cancer.evidence == breast_cancer.evidence_items + assert len(breast_cancer.assertions) == 2 + + +class TestTherapies(object): + + def test_get_all(self): + therapies = civic.get_all_therapies() + assert len(therapies) >= 555 + + def test_get_by_id(self): + t = civic.get_therapy_by_id(19) + assert t.id == 19 + assert t.type == 'therapy' + + def test_attributes(self): + trametinib = civic.get_therapy_by_id(19) + assert trametinib.ncit_id == 'C77908' + assert trametinib.name == 'Trametinib' + assert set(trametinib.aliases) == { + 'JTP-74057', + 'GSK1120212', + 'MEK Inhibitor GSK1120212', + 'Mekinist', + 'N-(3-{3-cyclopropyl-5-[(2-fluoro-4-iodophenyl)amino]-6,8-dimethyl-2,4,7-trioxo-3,4,6,7-tetrahydropyrido[4,3-d]pyrimidin-1(2H)-yl}phenyl)acetamide' + } + + def test_get_by_name(self): + trametinib = civic.get_therapy_by_name('Trametinib') + assert trametinib.id == 19 + + def test_get_by_ncit_id(self): + trametinib = civic.get_therapy_by_ncit_id('C77908') + assert trametinib.id == 19 + + def test_properties(self): + trametinib = civic.get_therapy_by_id(19) + assert len(trametinib.evidence) == 138 + assert trametinib.evidence == trametinib.evidence_items + assert len(trametinib.assertions) == 3 + + +class TestPhenotypes(object): + + def test_get_all(self): + phenotypes = civic.get_all_phenotypes() + assert len(phenotypes) >= 265 + + def test_get_by_id(self): + pediatric_onset = civic.get_phenotype_by_id(15320) + assert pediatric_onset.hpo_id == 'HP:0410280' + assert pediatric_onset.name == 'Pediatric onset' + + def test_get_by_name(self): + pediatric_onset = civic.get_phenotype_by_name('Pediatric onset') + assert pediatric_onset.id == 15320 + + def test_get_by_hpo_id(self): + pediatric_onset = civic.get_phenotype_by_hpo_id('HP:0410280') + assert pediatric_onset.id == 15320 + + def test_properties(self): + pediatric_onset = civic.get_phenotype_by_id(15320) + assert len(pediatric_onset.evidence) == 140 + assert pediatric_onset.evidence == pediatric_onset.evidence_items + assert len(pediatric_onset.assertions) == 27 + + class TestCoordinateSearch(object): def test_search_assertions(self): @@ -288,6 +590,9 @@ def test_search_assertions(self): assertion_ids = [x.id for x in assertions] assert set(assertion_ids) >= set(v600e_assertion_ids) + def test_search_evidence(self): + pass + def test_single_and_bulk_exact_return_same_variants(self): query = CoordinateQuery('7', 140453136, 140453136, 'T', '*') variants_single = civic.search_variants_by_coordinates(query, search_mode='exact') @@ -329,21 +634,21 @@ def test_single_and_bulk_exact_return_same_variants(self): assert len(variants_single) == 0 assert len(variants_bulk) == 0 - query = CoordinateQuery('3', 10183706, 10183706, None, 'C') + query = CoordinateQuery('3', 10183694, 10183694, None, 'G') variants_single = civic.search_variants_by_coordinates(query, search_mode='exact') variants_bulk = civic.bulk_search_variants_by_coordinates([query], search_mode='exact') assert len(variants_single) == 1 assert len(variants_bulk[query]) == 1 assert hash(variants_single[0]) == variants_bulk[query][0].v_hash - query = CoordinateQuery('3', 10183706, 10183706, 'T', 'C') + query = CoordinateQuery('3', 10183694, 10183694, 'T', 'G') variants_single = civic.search_variants_by_coordinates(query, search_mode='exact') variants_bulk = civic.bulk_search_variants_by_coordinates([query], search_mode='exact') assert len(variants_single) == 1 assert len(variants_bulk[query]) == 1 assert hash(variants_single[0]) == variants_bulk[query][0].v_hash - query = CoordinateQuery('3', 10183706, 10183706, '*', 'C') + query = CoordinateQuery('3', 10183694, 10183694, '*', 'G') variants_single = civic.search_variants_by_coordinates(query, search_mode='exact') variants_bulk = civic.bulk_search_variants_by_coordinates([query], search_mode='exact') variants_single = list(map(lambda v: hash(v), variants_single)) @@ -359,8 +664,8 @@ def test_bulk_any_search_variants(self): CoordinateQuery('7', 140453136, 140453137, 'TT') ] search_results = civic.bulk_search_variants_by_coordinates(sorted_queries, search_mode='any') - assert len(search_results[sorted_queries[0]]) == 12 - assert len(search_results[sorted_queries[1]]) >= 13 + assert len(search_results[sorted_queries[0]]) >= 17 + assert len(search_results[sorted_queries[1]]) >= 11 def test_bulk_exact_search_variants(self): sorted_queries = [ @@ -382,7 +687,7 @@ def test_bulk_qe_search_variants(self): ] search_results = civic.bulk_search_variants_by_coordinates(sorted_queries, search_mode='query_encompassing') assert len(search_results[sorted_queries[0]]) == 1 - assert len(search_results[sorted_queries[1]]) == 6 + assert len(search_results[sorted_queries[1]]) >= 5 def test_bulk_re_search_variants(self): sorted_queries = [ @@ -390,8 +695,8 @@ def test_bulk_re_search_variants(self): CoordinateQuery('7', 140453136, 140453137) ] search_results = civic.bulk_search_variants_by_coordinates(sorted_queries, search_mode='record_encompassing') - assert len(search_results[sorted_queries[0]]) == 12 - assert len(search_results[sorted_queries[1]]) == 9 + assert len(search_results[sorted_queries[0]]) >= 17 + assert len(search_results[sorted_queries[1]]) >= 14 def test_build38_exact_search_variants(self, v600e): query = CoordinateQuery('7', 140753336, 140753336, 'T', 'A', 'GRCh38') @@ -406,7 +711,7 @@ def test_build38_exact_search_variants(self, v600e): query = CoordinateQuery('3', 10146548, 10146549, 'C', None, 'GRCh38') search_results = civic.search_variants_by_coordinates(query, search_mode='exact') - assert len(search_results) == 1 + assert len(search_results) >= 1 assert search_results[0].id == 1918 query = CoordinateQuery('3', 10146618, 10146618, None, 'G', 'GRCh38') @@ -463,21 +768,6 @@ def test_errors(self): assert "Unexpected ref `-` in coordinate query. Did you mean `None`?" in str(context.value) -class TestTherapies(object): - - def test_has_ncit_id(self, v600e_assertion): - trametinib = v600e_assertion.therapies[0] - assert trametinib.ncit_id == 'C77908' - assert 'pubchem_id' not in trametinib.keys() - assert trametinib.name == 'Trametinib' - assert set(trametinib.aliases) == { - 'JTP-74057', - 'GSK1120212', - 'MEK Inhibitor GSK1120212', - 'Mekinist', - 'N-(3-{3-cyclopropyl-5-[(2-fluoro-4-iodophenyl)amino]-6,8-dimethyl-2,4,7-trioxo-3,4,6,7-tetrahydropyrido[4,3-d]pyrimidin-1(2H)-yl}phenyl)acetamide' - } - #warning logging tests LOGGER = logging.getLogger(__name__) diff --git a/civicpy/utils.py b/civicpy/utils.py new file mode 100644 index 0000000..210570d --- /dev/null +++ b/civicpy/utils.py @@ -0,0 +1,33 @@ +UNMARKED_PLURALS = {'evidence'} + +def pluralize(string): + if string == 'therapy': + return 'therapies' + if string in UNMARKED_PLURALS: + return '{}_items'.format(string) + if string.endswith('s'): + return string + return string + 's' + + +def singularize(string): + string = string.rstrip('s') + if string == 'evidence_item': + string = 'evidence' + elif string == 'therapie': + string = 'therapy' + return string + + +def search_url(element, use_search_meta): + element = pluralize(element).lower() + components = [API_URL, element] + if use_search_meta: + components.append('search') + return '/'.join(components) + + +def snake_to_camel(snake_string): + words = snake_string.split('_') + cap_words = [x.capitalize() for x in words] + return ''.join(cap_words) diff --git a/docs/civic.rst b/docs/civic.rst index 2f42553..51e5e02 100644 --- a/docs/civic.rst +++ b/docs/civic.rst @@ -5,13 +5,13 @@ The **civic** module CIViCpy is primarily designed to enable exploration of the content of CIViC through Python :class:`CivicRecord` objects. While these record objects can be initialized independently, the **civic** module also provides several routines for -`getting records`_ directly from CIViC. Use of these routines is recommended. +:ref:`getting_records` directly from CIViC. Use of these routines is recommended. The **civic** module may be imported from **civicpy** at the top level:: >>>from civicpy import civic -CIViC records +CIViC Records ------------- .. autoclass:: CivicRecord @@ -28,26 +28,19 @@ CIViC records The record ID. This is set on initialization using the `id` keyword argument, and reflects the primary ID for the record as stored in CIViC. - .. attribute:: site_link +The primary CIViC records are found on the sidebar menu on CIViC, and are fully-formed. - A URL string to the appropriate landing page for the CivicRecord on the CIViC web application. - -CIViC record types -~~~~~~~~~~~~~~~~~~ - -The primary CIViC records are found on the CIViC advanced search page, and are fully-formed +Gene +^^^^ .. autoclass:: Gene :show-inheritance: + :members: .. attribute:: aliases A list of alternate gene symbols by which this gene is referenced. - .. attribute: assertions - - A list of :class:`Assertion` records that this gene is involved in. - .. attribute:: description A curated summary of the clinical significance of this gene. @@ -60,20 +53,135 @@ The primary CIViC records are found on the CIViC advanced search page, and are f The `HGNC Gene Symbol`_ associated with this gene. - .. attribute:: sources +.. _Entrez ID: https://www.ncbi.nlm.nih.gov/gene/ - A list of :class:`CivicAttribute` source objects associated with the gene description. +.. _HGNC Gene Symbol: https://www.genenames.org/ - .. attribute:: variants +Factor +^^^^^^ - A list of :class:`Variant` records associated with this gene. +.. autoclass:: Factor + :show-inheritance: + :members: -.. _Entrez ID: https://www.ncbi.nlm.nih.gov/gene/ + .. attribute:: aliases + + A list of alternate names by which this factor is referenced. + + .. attribute:: description + + A curated summary of the clinical significance of this factor. + + .. attribute:: full_name + + Factor names are often an commonly-used abbreviation. The full name is + the unabbreviated name. + + .. attribute:: name + + The shortest, most concise reference to the factor. Often an + abbreviation. + + .. attribute:: ncit_id + + The `NCIthesaurus ID`_ referencing the factor. + +.. _NCIthesaurus ID: https://ncithesaurus.nci.nih.gov/ncitbrowser/ + +Fusion +^^^^^^ + +.. autoclass:: Fusion + :show-inheritance: + :members: + + .. attribute:: subtype + + .. attribute:: aliases + + A list of alternate names by which this fusion is referenced. + + .. attribute:: description + + A curated summary of the clinical significance of this fusion. + + .. attribute:: five_prime_gene_id + + The :attr:`CivicRecord.id` of the 5' fusion partner :class:`Gene` if that partner is + ``KNOWN``. + + .. attribute:: five_prime_partner_status + + The status of the 5' fusion partner. One of ``KNOWN``, ``UNKNOWN``, or + ``MULTIPLE``. + + .. attribute:: name + + The name of the fusion. This will be the 5' partner, followed by the 3' + partner, separated by ``::``. If a partner is ``KNOWN``, the `HGNC Gene Symbol`_ + of the partner gene is used. If the partner is ``UNKNOWN``, + a ``?`` is used. If there are ``MULTIPLE`` possible gene partners, + ``v`` is used. + + .. attribute:: three_prime_gene_id + + The :attr:`CivicRecord.id` of the 3' fusion partner :class:`Gene` if that partner is + ``KNOWN``. + + .. attribute:: three_prime_partner_status + + The status of the 3' fusion partner. One of ``KNOWN``, ``UNKNOWN``, or + ``MULTIPLE``. .. _HGNC Gene Symbol: https://www.genenames.org/ + +Variant +^^^^^^^ + .. autoclass:: Variant :show-inheritance: + :members: + + .. attribute:: feature_id + + The :attr:`CivicRecord.id` of the :class:`Gene`, :class:`Factor`, or + :class:`Fusion` the variant belongs to. + + .. attribute:: name + + The curated name given to this variant. + + .. attribute:: single_variant_molecular_profile_id + + The :attr:`CivicRecord.id` of the :class:`MolecularProfile` representing the single + variant on its own. + + .. attribute:: subtype + + The specific type of variant. One of ``gene_variant``, + ``factor_variant``, or ``fusion_variant``. + + .. attribute:: variant_aliases + + A curated list of aliases by which this variant is referenced. + + .. attribute:: variant_types + + A list of :class:`CivicAttribute` objects describing `variant types`_ from the + `Sequence Ontology`_. + +.. _variant types: https://docs.civicdb.org/en/latest/model/variants/types.html + +.. _Sequence Ontology: http://www.sequenceontology.org/ + + +GeneVariant +""""""""""" + +.. autoclass:: GeneVariant + :show-inheritance: + :members: .. attribute:: allele_registry_id @@ -95,117 +203,117 @@ The primary CIViC records are found on the CIViC advanced search page, and are f The `HGNC Gene Symbol`_ of the gene this variant belongs to. - .. attribute:: gene + .. attribute:: hgvs_expressions - The :class:`Gene` this variant belongs to. + Curated `HGVS expressions`_ describing this variant. - .. attribute:: gene_id +.. _ClinGen Allele Registry ID: http://reg.clinicalgenome.org - The :attr:`CivicRecord.id` of the gene this variant belongs to. +.. _clinvar ids: https://www.ncbi.nlm.nih.gov/clinvar - .. attribute:: hgvs_expressions +.. _CIViC coordinates: https://docs.civicdb.org/en/latest/model/variants/coordinates.html - Curated `HGVS expressions`_ describing this variant. +.. _HGVS expressions: https://varnomen.hgvs.org - .. attribute:: name - The curated name given to this variant. +FactorVariant +""""""""""""" - .. attribute:: moleulcar_profiles +.. autoclass:: FactorVariant + :show-inheritance: + :members: - A list of :class:`MolecularProfile` objects of all the molecular - profiles involving this variant. + .. attribute:: ncit_id - .. attribute:: variant_aliases - aliases + The `NCIthesaurus ID`_ referencing the factor variant. - A curated list of aliases by which this variant is referenced. +.. _NCIthesaurus ID: https://ncithesaurus.nci.nih.gov/ncitbrowser/ - .. attribute:: variant_groups - groups - A list of `variant groups`_ to which this variant belongs. +FusionVariant +""""""""""""" - .. attribute:: variant_types - types +.. autoclass:: FusionVariant + :show-inheritance: + :members: - A list of :class:`CivicAttribute` objects describing `variant types`_ from the - `Sequence Ontology`_. + .. attribute:: five_prime_coordinates -.. _ClinGen Allele Registry ID: http://reg.clinicalgenome.org + A :class:`CivicAttribute` object describing `CIViC coordinates`_ of the + 5' fusion partner, if that partner is ``KNOWN``. -.. _clinvar ids: https://www.ncbi.nlm.nih.gov/clinvar + .. attribute:: three_prime_coordinates -.. _CIViC coordinates: https://docs.civicdb.org/en/latest/model/variants/coordinates.html + A :class:`CivicAttribute` object describing `CIViC coordinates`_ of the + 3' fusion partner, if that partner is ``KNOWN``. -.. _HGVS expressions: https://varnomen.hgvs.org + .. attribute:: vicc_compliant_name -.. _variant groups: https://docs.civicdb.org/en/latest/model/variant_groups.html + A name representing the fusion variant compliant with the `VICC fusion + specification`_. -.. _variant types: https://docs.civicdb.org/en/latest/model/variants/types.html +.. _CIViC coordinates: https://docs.civicdb.org/en/latest/model/variants/coordinates.html -.. _Sequence Ontology: http://www.sequenceontology.org/ +.. _VICC fusion specification: https://fusions.cancervariants.org/en/latest/nomenclature.html + + +MolecularProfile +^^^^^^^^^^^^^^^^ .. autoclass:: MolecularProfile :show-inheritance: + :members: - .. attribute: aliases + .. attribute:: aliases A curated list of aliases by which this molecular profile is referenced. - .. attribute: assertions - - A list of :class:`Assertion` records associated with this molecular - profile. - - .. attribute:: definition + .. attribute:: description A curated summary of the clinical significance of this molecular profile. - .. attribute: evidence_items - - A list of :class:`Evidence` associated with this molecular profile. - .. attribute:: molecular_profile_score - The CIViC `molcular profile score`_ associated with this molecular + The CIViC `molecular profile score`_ associated with this molecular profile. .. attribute:: name The human readable name of this molecular profile, including gene and variant names. - .. attribute: variant_ids + .. attribute:: source_ids - An list of integers designating the :attr:`CivicRecord.id` for the variants involved in this - molecular profile. + A list of integers designating the :attr:`CivicRecord.id` for the + class:`Source` records associated with the molecular profile + description. - .. attribute: sources + .. attribute:: variant_ids - A list of :class:`CivicAttribute` source objects associated with the molecular profile description. + An list of integers designating the :attr:`CivicRecord.id` for the class:`Variant` records involved in this + molecular profile. - .. attribute: variants +.. _molecular profile score: https://civic.readthedocs.io/en/latest/model/molecular_profiles/evidence_score.html - A list :class:`Variant` objects involved in this molecular profile. -.. _molecular profile score: https://civic.readthedocs.io/en/latest/model/molecular_profiles/evidence_score.html +Evidence +^^^^^^^^ .. autoclass:: Evidence :show-inheritance: + :members: - .. attribute:: assertions + .. attribute:: assertion_ids - CIViC :class:`Assertion` records containing this evidence. + The list of :attr:`CivicRecord.id` of :class:`Assertion` records this evidence is a part of. .. attribute:: description - statement The Evidence Statement (returned as `description` by the CIViC API) is a brief summary of the clinical implications of the :attr:`variant` in the context of the specific :attr:`disease`, :attr:`evidence_type`, and :attr:`significance` as curated from the cited literature source. - .. attribute:: disease + .. attribute:: disease_id - The cancer or cancer subtype context for the evidence record. + The :attr:`CivicRecord.id` of the :class:`Disease` record of the cancer of cancer subtype context for the evidence record. **None** for functional evidence_type. .. attribute:: evidence_direction @@ -220,22 +328,17 @@ The primary CIViC records are found on the CIViC advanced search page, and are f Category of clinical action/relevance implicated by event. Refer to the additional `documentation on evidence types`_ for details on how to enter evidence of each of the six types: Predictive, Prognostic, Predisposing, Diagnostic, Functional, and Oncogenic. - .. attribute:: molecular_profile - - The :class:`MolecularProfile` object this evidence item belongs to. - .. attribute:: molecular_profile_id - The :attr:`CivicRecord.id` of the molecular profile this evidence item belongs to. + The :attr:`CivicRecord.id` of the :class:`MolecularProfile` this evidence item belongs to. .. attribute:: name A system-generated unique identifier for the evidence record, e.g. `EID7`. - .. attribute:: phenotypes + .. attribute:: phenotype_ids - Zero or more phenotype :class:`CivicAttribute`, linked to corresponding Human Phenotype Ontology (`HPO`_) terms - when applicable. + The list of :attr:`CivicRecord.id` of :class:`Phenotype` records linked to corresponding `Human Phenotype Ontology (HPO)`_ terms when applicable. .. attribute:: rating @@ -247,9 +350,9 @@ The primary CIViC records are found on the CIViC advanced search page, and are f the corresponding :attr:`evidence_type`. Please see `Understanding Significance`_ for more details on the expected values for this field. - .. attribute:: source + .. attribute:: source_id - A :class:`CivicAttribute` source object from which this evidence was derived. + The :attr:`CivicRecord.id` of the :class:`Source` object this evidence was derived from. .. attribute:: status @@ -259,30 +362,36 @@ The primary CIViC records are found on the CIViC advanced search page, and are f - *accepted*: This evidence has been reviewed and approved by a CIViC editor - *rejected*: This evidence has been reviewed and rejected by a CIViC editor - .. attribute:: therapies + .. attribute:: therapy_ids - Zero or more therapy :class:`CivicAttribute`, linked to corresponding `NCIT`_ terms when applicable. Only used with - therapeutic response predictive :attr:`evidence_type`. + The list of :attr:`CivicRecord.id` of the :class:`Therapy` objects this evidence item is linked to. Only used with therapeutic response predictive evidence_type. .. attribute:: therapy_interaction_type One of 'Combination', 'Sequential', or 'Substitutes', this field describes how multiple indicated therapies within a therapeutic response predictive :attr:`evidence_type` are related. +.. _Human Phenotype Ontology (HPO): https://hpo.jax.org/ + .. _Understanding Levels: https://civic.readthedocs.io/en/latest/model/evidence/level.html#understanding-levels .. _Understanding Evidence Ratings: https://civic.readthedocs.io/en/latest/model/evidence/evidence_rating.html#understanding-evidence-ratings + +Assertion +^^^^^^^^^ + .. autoclass:: Assertion :show-inheritance: + :members: .. attribute:: acmg_codes - Evidence codes used in the assessment of variants under the `ACMG/AMP`_ classification guidelines. + Evidence codes used in the assessment of germline variant pathogenicity under the `ACMG/AMP`_ classification guidelines. .. attribute:: amp_level - The clinical interpretation classification by `AMP/ASCO/CAP`_ or `ACMG/AMP`_ guidelines. + The clinical tiering of somatic variants by `AMP/ASCO/CAP`_ guidelines. .. attribute:: assertion_direction @@ -293,14 +402,22 @@ The primary CIViC records are found on the CIViC advanced search page, and are f Category of clinical action/relevance implicated by event. Refer to the additional `documentation on assertion types`_ for details on how to enter assertions of each of the five types: Predictive, Prognostic, Predisposing, Diagnostic, and Oncogenic. + .. attribute:: clingen_codes + + Classification of somatic variant oncogenicity under the `ClinGen/CGC/VICC`_ classification guidelines. + .. attribute:: description The Assertion Description gives detail including practice guidelines and approved tests for the molecular profile. See `curating assertions`_ for more details. - .. attribute:: disease + .. attribute:: disease_id + + The :attr:`CivicRecord.id` of the :class:`Disease` record of the cancer of cancer subtype context for the assertion record. + + .. attribute:: evidence_ids - A disease :class:`CivicAttribute`, linked to a corresponding `Disease Ontology`_ term when applicable. + A list of :attr:`CivicRecord.id` of the :class:`Evidence` records supporting this assertion record. .. attribute:: fda_companion_test @@ -311,10 +428,6 @@ The primary CIViC records are found on the CIViC advanced search page, and are f A boolean indicating whether or not the therapies indicated in the assertion have regulatory approval for use in the treatment of the assertion disease. - .. attribute:: molecular_profile - - The :class:`MolecularProfile` object this assertion belongs to. - .. attribute:: molecular_profile_id The :attr:`CivicRecord.id` of the molecular profile this assertion belongs to. @@ -332,9 +445,9 @@ The primary CIViC records are found on the CIViC advanced search page, and are f The version associated with the indicated :attr:`nccn_guideline` document. - .. attribute:: phenotypes + .. attribute:: phenotype_ids - Zero or more phenotype :class:`CivicAttribute`, linked to corresponding Human Phenotype Ontology (`HPO`_) terms + Zero or more :class:`Phenotype` :attr:`CivicRecord.id`, linked to corresponding Human Phenotype Ontology (`HPO`_) terms when applicable. .. attribute:: significance @@ -357,11 +470,6 @@ The primary CIViC records are found on the CIViC advanced search page, and are f potential use in clinical reports. The Assertion Summary is designed for rapid communication of the Significance, especially when displayed in a longer list with other molecular profiles. - .. attribute:: therapies - - Zero or more therapy :class:`CivicAttribute`, linked to corresponding `NCIT`_ terms when applicable. Only used with - therapeutic response predictive :attr:`evidence_type`. - .. attribute:: therapy_interaction_type One of 'Combination', 'Sequential', or 'Substitutes', this field describes how multiple indicated therapies within @@ -375,13 +483,15 @@ The primary CIViC records are found on the CIViC advanced search page, and are f .. _ACMG/AMP: https://www.ncbi.nlm.nih.gov/pubmed/25741868 +.. _ClinGen/CGC/VICC: https://pubmed.ncbi.nlm.nih.gov/35101336/ + .. _curating assertions: https://docs.civicdb.org/en/latest/curating/assertions.html .. _Disease Ontology: http://disease-ontology.org/ .. _documentation on evidence types: https://docs.civicdb.org/en/latest/model/evidence/type.html -.. _documentation of assertion types: https://docs.civicdb.org/en/latest/model/assertions/overview.html +.. _documentation on assertion types: https://docs.civicdb.org/en/latest/model/assertions/overview.html .. _NCIT: https://ncit.nci.nih.gov/ncitbrowser/ @@ -391,68 +501,152 @@ The primary CIViC records are found on the CIViC advanced search page, and are f .. _Understanding Significance: https://civic.readthedocs.io/en/latest/model/evidence/significance.html#understanding-significance -CIViC attributes -~~~~~~~~~~~~~~~~ -The :class:`CivicAttribute` class is a special type of CivicRecord that is not indexed, and is used as a base -class for additional complex records beyond those mentioned above (e.g. diseases, therapies). CivicAttributes are not cached -except as attached objects to non-:class:`CivicAttribute` :class:`CivicRecord` objects, and cannot be retrieved -independently. +Source +^^^^^^ -.. autoclass:: CivicAttribute +.. autoclass:: Source + :members: + + .. attribute:: abstract + + The abstract text of the source. + + .. attribute:: asco_abstract_id + + For ASCO sources, the abstract ID. + + .. attribute:: author_string + + A string of all of the authors of the source or, for ASCO sources, the abstract presenter. + + .. attribute:: citation + + A short string containing key information about the source for human-readable + identification. + + .. attribute:: citation_id + + A unique identifier for the source. For PubMed sources, this is the + PMID. For ASH sources this is the DOI. For ASCO sources this is the + ASCO Web ID found in the URL of the abstract. + + .. attribute:: clinical_trials + + A list of `Clinical Trial`_ IDs described in the source. + + .. attribute:: full_journal_title + + The full title of the publishing journal. -Getting records ---------------- + .. attribute:: journal -By ID -~~~~~ + An abbreviated version of the title of the publishing journal. -Records can be obtained by ID through a collection of functions provided in the `civic` module. :class:`Gene` -objects can be queried by the following methods: + .. attribute:: pmc_id -.. autofunction:: get_gene_by_id -.. autofunction:: get_genes_by_ids -.. autofunction:: get_all_genes + When available, the `PubMed Central`_ ID of the source. -Analogous methods exist for :class:`Variant`, :class:`MolecularProfile`, :class:`Assertion`, and :class:`Evidence`: + .. attribute:: publication_date -.. autofunction:: get_variant_by_id -.. autofunction:: get_variants_by_ids -.. autofunction:: get_all_variants + The date the source was published. -.. autofunction:: get_molecular_profile_by_id -.. autofunction:: get_molecular_profiles_by_ids -.. autofunction:: get_all_molecular_profiles + .. attribute:: source_type -.. autofunction:: get_assertion_by_id -.. autofunction:: get_assertions_by_ids -.. autofunction:: get_all_assertions + The platform making the source available. One of ``PUBMED``, ``ASCO``, + or ``ASH``. -.. autofunction:: get_evidence_by_id -.. autofunction:: get_evidence_by_ids -.. autofunction:: get_all_evidence + .. attribute:: source_url -By Coordinate -~~~~~~~~~~~~~ + A link to the source on the platfrom that made the source available. -Variant records can be searched by GRCh37 coordinates. To query specific genomic coordinates, you will -need to construct a :class:`CoordinateQuery` object, and pass this query to the -:func:`search_variants_by_coordinates` function. If you wish to query multiple genomic coordinates (e.g. -a set of variants observed in a patient tumor), construct a sorted list of :class:`CoordinateQuery` objects -(sorted by `chr`, `start`, `stop`, `alt`), and pass the list to the :func:`bulk_search_variants_by_coordinates` -function. + .. attribute:: title -.. autoclass:: CoordinateQuery -.. autofunction:: search_variants_by_coordinates -.. autofunction:: bulk_search_variants_by_coordinates + The title of the source. -Coordinates can also be used to query :class:`Assertion` records: +.. _Clinical Trial: https://clinicaltrials.gov/ -.. autofunction:: search_assertions_by_coordinates +.. _PubMed Central: https://pmc.ncbi.nlm.nih.gov/ -By Other Attribute -~~~~~~~~~~~~~~~~~~ -.. autofunction:: search_variants_by_allele_registry_id -.. autofunction:: search_variants_by_hgvs -.. autofunction:: search_variants_by_name +Disease +^^^^^^^ + +.. autoclass:: Disease + :members: + + .. attribute:: aliases + + A list of alternate names for the disease. + + .. attribute:: disease_url + + A link to the `Disease Ontology`_ entry for the disease concept. + + .. attribute:: doid + + The `Disease Ontology`_ ID for the disease concept. + + .. attribute:: name + + The name of the disease. + +.. _Disease Ontology: http://disease-ontology.org/ + + +Therapy +^^^^^^^ + +.. autoclass:: Therapy + :members: + + .. attribute:: aliases + + A list of alternate names for the therapy. + + .. attribute:: name + + The name of the therapy. + + .. attribute:: ncit_id + + The `NCIthesaurus`_ ID for the therapy concept. + + .. attribute:: therapy_url + + A link to the `NCIthesaurus`_ entry for the therapy concept. + +.. _NCIthesaurus: https://ncithesaurus.nci.nih.gov/ncitbrowser/ + + +Phenotype +^^^^^^^^^ + +.. autoclass:: Phenotype + :members: + + .. attribute:: name + + The name of the phenotype. + + .. attribute:: hpo_id + + The `Human Phenotype Ontology`_ ID for the phenotype concept. + + .. attribute:: phenotype_url + + A link to the `Human Phenotype Ontology`_ entry for the phenotype concept. + +.. _Human Phenotype Ontology: https://hpo.jax.org/ + + +CIViC Attributes +^^^^^^^^^^^^^^^^ + +The :class:`CivicAttribute` class is a special type of CivicRecord that is not indexed, and is used as a base +class for additional complex records beyond those mentioned above (e.g. diseases, therapies). CivicAttributes are not cached +except as attached objects to non-:class:`CivicAttribute` :class:`CivicRecord` objects, and cannot be retrieved +independently. + +.. autoclass:: CivicAttribute + diff --git a/docs/getting_records.rst b/docs/getting_records.rst new file mode 100644 index 0000000..35ded74 --- /dev/null +++ b/docs/getting_records.rst @@ -0,0 +1,217 @@ +.. py:module:: civic + :noindex: + +.. _getting_records: + +Getting Records +=============== + +CIViCpy offeres a wide range of convenience methods as part of the `civic` module to retrieve different +CIViC entities. + +Get All Records For A Specific Entity Type +------------------------------------------ + +Features +~~~~~~~~ + +.. autofunction:: get_all_features +.. autofunction:: get_all_genes +.. autofunction:: get_all_factors +.. autofunction:: get_all_fusions + +Variants +~~~~~~~~ + +.. autofunction:: get_all_variants +.. autofunction:: get_all_gene_variants +.. autofunction:: get_all_factor_variants +.. autofunction:: get_all_fusion_variants + +Molecular Profiles +~~~~~~~~~~~~~~~~~~ + +.. autofunction:: get_all_molecular_profiles + +Assertions +~~~~~~~~~~ + +.. autofunction:: get_all_assertions + +Evidence Items +~~~~~~~~~~~~~~ + +.. autofunction:: get_all_evidence + +Variant Groups +~~~~~~~~~~~~~~ + +.. autofunction:: get_all_variant_groups + +Sources +~~~~~~~ + +.. autofunction:: get_all_sources + +Diseases +~~~~~~~~ + +.. autofunction:: get_all_diseases + +Therapies +~~~~~~~~~ + +.. autofunction:: get_all_therapies + +Phenotypes +~~~~~~~~~~ + +.. autofunction:: get_all_phenotypes + +By ID +----- + +Records can be obtained by CIViC ID through a collection of functions provided in the `civic` module. + +Features +~~~~~~~~ + +.. autofunction:: get_feature_by_id +.. autofunction:: get_features_by_ids + +.. autofunction:: get_gene_by_id +.. autofunction:: get_genes_by_ids + +.. autofunction:: get_factor_by_id +.. autofunction:: get_factors_by_ids + +.. autofunction:: get_fusion_by_id +.. autofunction:: get_fusions_by_ids + +Variants +~~~~~~~~ + +.. autofunction:: get_variant_by_id +.. autofunction:: get_variants_by_ids + +Molecular Profiles +~~~~~~~~~~~~~~~~~~ + +.. autofunction:: get_molecular_profile_by_id +.. autofunction:: get_molecular_profiles_by_ids + +Assertions +~~~~~~~~~~ + +.. autofunction:: get_assertion_by_id +.. autofunction:: get_assertions_by_ids + +Evidence Items +~~~~~~~~~~~~~~ + +.. autofunction:: get_evidence_by_id +.. autofunction:: get_evidence_by_ids + +Variant Groups +~~~~~~~~~~~~~~ + +.. autofunction:: get_variant_group_by_id +.. autofunction:: get_variant_groups_by_ids + +Sources +~~~~~~~ + +.. autofunction:: get_source_by_id +.. autofunction:: get_sources_by_ids + +Diseases +~~~~~~~~ + +.. autofunction:: get_disease_by_id +.. autofunction:: get_diseases_by_ids + +Therapies +~~~~~~~~~ + +.. autofunction:: get_therapy_by_id +.. autofunction:: get_therapies_by_ids + +Phenotypes +~~~~~~~~~~ + +.. autofunction:: get_phenotype_by_id +.. autofunction:: get_phenotypes_by_ids + + +By Coordinates +-------------- + +Variant records can be searched by GRCh37 coordinates. To query specific genomic coordinates, you will +need to construct a :class:`CoordinateQuery` object, and pass this query to the +:func:`search_variants_by_coordinates` function. If you wish to query multiple genomic coordinates (e.g. +a set of variants observed in a patient tumor), construct a sorted list of :class:`CoordinateQuery` objects +(sorted by `chr`, `start`, `stop`, `alt`), and pass the list to the :func:`bulk_search_variants_by_coordinates` +function. + +.. autoclass:: CoordinateQuery +.. autofunction:: search_variants_by_coordinates +.. autofunction:: bulk_search_variants_by_coordinates + +Coordinates can also be used to query :class:`Assertion` and +:class:`Evidence` records: + +.. autofunction:: search_assertions_by_coordinates +.. autofunction:: search_evidence_by_coordinates + +By Other Attribute +------------------ + +Genes +~~~~~ + +.. autofunction:: get_gene_by_entrez_id +.. autofunction:: get_gene_by_name + +Factors +~~~~~~~ + +.. autofunction:: get_factor_by_ncit_id +.. autofunction:: get_factor_by_name + +Fusions +~~~~~~~ + +.. autofunction:: get_fusion_by_name +.. autofunction:: search_fusions_by_partner_gene_id + +Variants +~~~~~~~~ + +.. autofunction:: search_variants_by_allele_registry_id +.. autofunction:: search_variants_by_hgvs +.. autofunction:: search_variants_by_name + +Sources +~~~~~~~ + +.. autofunction:: get_pubmed_source_by_id +.. autofunction:: get_ash_source_by_doi +.. autofunction:: get_asco_source_by_id + +Diseases +~~~~~~~~ + +.. autofunction:: get_disease_by_doid +.. autofunction:: get_disease_by_name + +Therapies +~~~~~~~~~ + +.. autofunction:: get_therapy_by_ncit_id +.. autofunction:: get_therapy_by_name + +Phenotypes +~~~~~~~~~~ + +.. autofunction:: get_phenotype_by_hpo_id +.. autofunction:: get_phenotype_by_name diff --git a/docs/index.rst b/docs/index.rst index 6e4140d..d6b51f0 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -23,18 +23,18 @@ If you find CIViCpy useful for your work, **please cite** our `manuscript >> my_variant_ids = [12, 306, 79] >>> my_variants = civic.get_variants_by_ids(my_variant_ids) >>> my_variants - [, , ] + [, , ] >>> [(v.gene.name, v.name) for v in my_variants] [('BRAF', 'V600E'), ('KRAS', 'G12D'), ('ERBB2', 'AMPLIFICATION')] - >>> braf_id = my_variants[0].gene_id # or my_variants[0].gene.id + >>> braf_id = my_variants[0].feature_id # or my_variants[0].gene.id or my_variants[0].feature.id >>> braf_variants = civic.get_gene_by_id(braf_id).variants >>> len(braf_variants) - 67 + 95 >>> set(my_variants) & set(braf_variants) - {} + {} **CIViCpy** lets you pull data from CIViC using the :mod:`civic` module and interact with records as dynamic objects. With the aid of caching, it is easy to explore relationships between CIViC records (e.g. assertions, genes, variants) @@ -61,7 +61,8 @@ knowledgebase. intro install civic + getting_records exports cli -.. _`CIViC knowledgebase`: https://civicdb.org \ No newline at end of file +.. _`CIViC knowledgebase`: https://civicdb.org