Skip to content

Add write gtf feature - Issue #21 #46

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions gtfparse/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,7 @@
from .attribute_parsing import expand_attribute_strings
from .create_missing_features import create_missing_features
from .parsing_error import ParsingError
from .write_gtf import write_gtf
from .read_gtf import (
read_gtf,
parse_gtf,
Expand Down
23 changes: 23 additions & 0 deletions gtfparse/write_gtf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
import polars
from pathlib import Path
import typing as t


COMMONS_COL = ['seqname', 'source', 'feature', 'start', 'end', 'score', 'strand', 'frame']


def write_gtf(df: polars.DataFrame, export_path: str | Path, headers: t.List[str] = None):
headers = headers or []
with open(export_path, 'w') as f:
for header in headers:
f.write(f"{header}\n")
for row in df.iter_rows(named=True):
f.write(f"{commons_cols(row)}\t{custom_fields(row)}\n")


def commons_cols(row) -> str :
return "\t".join([str(row[field] or '.') for field in COMMONS_COL])


def custom_fields(row) -> str:
return "; ".join([f'{field} "{row[field]}"' for field in row.keys() if (field not in COMMONS_COL) and (row[field])])
14 changes: 14 additions & 0 deletions tests/test_write_gtf.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
from gtfparse import read_gtf, write_gtf
from .data import data_path
from polars import DataFrame

REFSEQ_GTF_PATH = data_path("refseq.ucsc.small.gtf")


def test_write_gtf(tmp_path):
expected_gtf = read_gtf(REFSEQ_GTF_PATH)
write_gtf(expected_gtf, tmp_path/"dummy_gtf.gtf")
created_gtf = read_gtf(str(tmp_path/"dummy_gtf.gtf"))
assert isinstance(created_gtf, DataFrame)
assert expected_gtf == created_gtf