From c76f999033453d5b8a23f55b221e90bcd5288063 Mon Sep 17 00:00:00 2001 From: Karl Dyrhage Date: Tue, 12 Nov 2024 13:07:10 +0100 Subject: [PATCH] Updated docs for v0.4 --- docs/make.jl | 1 + docs/src/index.md | 20 ++++++++++++++++---- docs/src/loci.md | 23 +++++++++++++++++++++++ src/record.jl | 5 +++-- 4 files changed, 43 insertions(+), 6 deletions(-) create mode 100644 docs/src/loci.md diff --git a/docs/make.jl b/docs/make.jl index 1c3a533..686a339 100644 --- a/docs/make.jl +++ b/docs/make.jl @@ -5,6 +5,7 @@ makedocs(sitename = "GenomicAnnotations.jl", authors = "Karl Dyrhage", "index.md", "I/O" => "io.md", "Accessing and modifying annotations" => "accessing.md", + "Representing genomic loci" => "loci.md", "Filtering: the @genes macro" => "genes.md", "Examples" => "examples.md" ], diff --git a/docs/src/index.md b/docs/src/index.md index 3a0eb25..b7d45b4 100644 --- a/docs/src/index.md +++ b/docs/src/index.md @@ -34,19 +34,31 @@ end chr.genes[2].locus_tag = "test123" ``` -The `Locus` of a `Gene` retrieved with `locus(gene)`. The `Locus` itself is immutable, but can be updated with `locus!(gene, newlocus)`. For simplicity, `position(gene)` is shorthand for `locus(gene).position`. +The locus of a `Gene` is represented by an `AbstractLocus` (see [Loci](@ref)), which can be retrieved with `locus(gene)`. The locus of a gene can be updated with `locus!(gene, newlocus)`. The easiest way to create a locus is to use the constructor `Locus(s)`, which takes an `AbstractString` `s` and parses it as a GenBank locus string as defined here: https://www.insdc.org/submitting-standards/feature-table/#3.4. Note that remote entry descriptors have not been implemented. ```julia -# Create a new Locus, copying all fields of the old one but shifting the position by 1 -oldloc = locus(gene) -locus!(gene, Locus(oldloc.position .+ 1, oldloc.strand, oldloc.complete_left, oldloc.complete_right, oldloc.order, oldloc.join)) +# Creating a new locus +newlocus = Locus("complement(join(1..100,200..>300))") + +# Assigning a new locus to a gene +locus!(gene, newlocus) +# which is equivalent to +locus!(gene, "complement(join(1..100,200..>300))") # Access the genomic positions of all genes position.(chr.genes) ``` +For simplicity, `position(gene)` is shorthand for `locus(gene).position`. `locus(gene).position` gives an iteratable object that generates each individual position in the defined order. Thus: +```julia +loc = Locus("join(4..6,1..3)") +collect(loc.position) # Returns [4,5,6,1,2,3] +``` + The macro `@genes` can be used to filter through the annotations (see [`@genes`](@ref)). The keyword `gene` is used to refer to the individual `Gene`s. `@genes` can also be used to modify annotations. ```julia @genes(chr, length(gene) > 300) # Returns all features longer than 300 nt + +@genes(chr, CDS, ismissing(:product)) .= "hypothetical product" ``` Gene sequences can be accessed with `sequence(gene)`. For example, the following code will write the translated sequences of all protein-coding genes in `chr` to a file: diff --git a/docs/src/loci.md b/docs/src/loci.md new file mode 100644 index 0000000..f1f98d0 --- /dev/null +++ b/docs/src/loci.md @@ -0,0 +1,23 @@ +# [Representing genomci loci](@id Loci) + +The easiest way to create a locus is to use the constructor `Locus(s)`, which takes an `AbstractString` `s` and parses it as a GenBank locus string as defined here: https://www.insdc.org/submitting-standards/feature-table/#3.4. Note that remote entry descriptors have not been implemented. + +## Internal representation +Since v0.4.0, genomic loci are represented using instances of `AbstractLocus`. Simple descriptors are represented with `PointLocus{T}` and `SpanLocus{T}`, where `T` is an `AbstractDescriptor`: + +| GenBank string | GenomicAnnotations representation | Description | +| --- | --- | --- | +| 1 | `PointLocus{SingleNucleotide}(1)` | Refers to a single nucleotide. | +| 1^2 | `PointLocus{BetweenNucleotides}(1)` | Refers to the internucleotide space immediately after position 1. | +| 10..20 | `SpanLocus{ClosedSpan}(10:20)` | Denotes a closed sequence span. | +| 10..>20 | `SpanLocus{OpenRightSpan}(10:20) | Denotes a sequence span where the right side is open, i.e. the end-point is undefined but earliest at position 20. | +| <10..20 | `SpanLocus{OpenLeftSpan}(10:20) | The left end-point is undefined. | +| <10..>20 | `SpanLocus{OpenSpan}(10:20) | Both end-points are undefined. | + +These can be wrapped in `Complement` for loci on the complement strand, e.g. `Complement(SpanLocus{ClosedSpan}(10:20))` representing "complement(10..20)". Simplified constructors are provided for all `AbstractDescriptor`s, e.g. `ClosedSpan(1:10) == SpanLocus(1:10, ClosedSpan)`. + +Compound loci are represented with `Join` and `Order`. Both types have a single field, `loc` which contains any number of simple descriptors. They can be wrapped with complement, as can the individual elements in `loc`. + +```julia +Locus("complement(join(10..20,30..>40))") isa Complement{Join{SpanLocus{ClosedSpan}, SpanLocus{OpenRightSpan}}} +``` \ No newline at end of file diff --git a/src/record.jl b/src/record.jl index b87b516..8ddaaac 100644 --- a/src/record.jl +++ b/src/record.jl @@ -337,9 +337,10 @@ iscomplement(loc::AbstractLocus) = false """ - iscomplete(gene) + iscomplete(gene::AbstractGene) + iscomplete(loc::AbstractLocus) -Return `true` if `gene` is a complete gene, i.e. not a pseudo gene or partial. +Return `true` if `gene` is a complete gene, i.e. not a pseudo gene or partial. Compound loci (i.e. those with join/order) are considered incomplete if any element is incomplete. """ iscomplete(gene::AbstractGene) = iscomplete(locus(gene)) && !any(get(gene, :pseudo, false)) && !any(get(gene, :partial, false)) iscomplete(locus::SpanLocus{ClosedSpan}) = true