-
Notifications
You must be signed in to change notification settings - Fork 228
WIP: Add return_table helper function #1336
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Changes from all commits
e668ec1
6d8b2e0
10f8238
3546ef6
8e59e57
88832db
3a98346
c8cef5e
46f2cd4
b2a388f
d7084e5
935bdaa
eae2eef
c65fb0e
61f6126
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -15,4 +15,5 @@ | |
dummy_context, | ||
is_nonstr_iter, | ||
launch_external_viewer, | ||
return_table, | ||
) |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -10,6 +10,9 @@ | |
from collections.abc import Iterable | ||
from contextlib import contextmanager | ||
|
||
import geopandas as gpd | ||
import numpy as np | ||
import pandas as pd | ||
import xarray as xr | ||
from pygmt.exceptions import GMTInvalidInput | ||
|
||
|
@@ -267,3 +270,55 @@ def args_in_kwargs(args, kwargs): | |
If one of the required arguments is in ``kwargs``. | ||
""" | ||
return any(arg in kwargs for arg in args) | ||
|
||
|
||
def return_table(result, data_format, format_parameter, df_columns): | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Looks promising! A few comments:
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Sounds good; I'll have to get smarter on the last two but I don't see why it should be a problem.
I like the idea of keeping it short, especially when there is a default option (I anticipate it being a numpy array) and the strings are not also the same word as Python modules or variable types. But I understand how the single letters could be confusing.
Since this is a helper function, I envisioned that the argument for There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
Added in c8cef5e There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
I'll have to agree with Meghan that long descriptive names like |
||
r""" | ||
Take the table output from the GMT API and return it as either a string, | ||
array, or DataFrame. | ||
|
||
Parameters | ||
---------- | ||
result : str | ||
The table returned from the GMT API as a string. | ||
data_format : str | ||
A single-letter string that specifies requested data format of the | ||
table. | ||
**a** : numpy array | ||
**d** : pandas DataFrame | ||
**s** : string | ||
**x** : xarray DataArray | ||
format_parameter : str | ||
The name of the parameter used to specify the data format in the | ||
pygmt function. This name is used when raising the GMTInvalidInput | ||
error to ensure module-specific parameters are consistent with the | ||
error raised. | ||
df_columns : list | ||
The column names of the returned pandas DataFrame. | ||
""" | ||
|
||
if data_format == "s": | ||
return result | ||
data_list = [] | ||
for string_entry in result.strip().split("\n"): | ||
float_entry = [] | ||
string_list = string_entry.strip().split() | ||
for i in string_list: | ||
try: | ||
float_entry.append(float(i)) | ||
except ValueError: | ||
continue | ||
if float_entry != []: | ||
data_list.append(float_entry) | ||
data_array = np.array(data_list) | ||
if data_format == "a": | ||
result = data_array | ||
elif data_format == "x": | ||
result = xr.DataArray(data_array) | ||
elif data_format == "d": | ||
result = pd.DataFrame(data_array, columns=df_columns) | ||
else: | ||
raise GMTInvalidInput( | ||
f"""Must specify {format_parameter} as either a, d, s, or x.""" | ||
) | ||
return result |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -10,6 +10,7 @@ | |
data_kind, | ||
fmt_docstring, | ||
kwargs_to_strings, | ||
return_table, | ||
use_alias, | ||
) | ||
|
||
|
@@ -33,7 +34,14 @@ | |
n="interpolation", | ||
) | ||
@kwargs_to_strings(R="sequence", S="sequence") | ||
def grdtrack(points, grid, newcolname=None, outfile=None, **kwargs): | ||
def grdtrack( | ||
points, | ||
grid, | ||
data_format="d", | ||
df_columns=["longitude", "latitude", "z-value"], | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Not sure if it's a good idea to have default column names, especially for the x (longitude) and y (latitude) columns since someone passing in a |
||
outfile=None, | ||
**kwargs | ||
): | ||
r""" | ||
Sample grids at specified (x,y) locations. | ||
|
||
|
@@ -248,9 +256,6 @@ def grdtrack(points, grid, newcolname=None, outfile=None, **kwargs): | |
- None if ``outfile`` is set (track output will be stored in file set | ||
by ``outfile``) | ||
""" | ||
if data_kind(points) == "matrix" and newcolname is None: | ||
raise GMTInvalidInput("Please pass in a str to 'newcolname'") | ||
|
||
with GMTTempFile(suffix=".csv") as tmpfile: | ||
with Session() as lib: | ||
# Choose how data will be passed into the module | ||
|
@@ -272,11 +277,13 @@ def grdtrack(points, grid, newcolname=None, outfile=None, **kwargs): | |
|
||
# Read temporary csv output to a pandas table | ||
if outfile == tmpfile.name: # if user did not set outfile, return pd.DataFrame | ||
try: | ||
column_names = points.columns.to_list() + [newcolname] | ||
result = pd.read_csv(tmpfile.name, sep="\t", names=column_names) | ||
except AttributeError: # 'str' object has no attribute 'columns' | ||
result = pd.read_csv(tmpfile.name, sep="\t", header=None, comment=">") | ||
result_data = tmpfile.read() | ||
result = return_table( | ||
result=result_data, | ||
data_format=data_format, | ||
format_parameter="data_format", | ||
df_columns=df_columns, | ||
) | ||
elif outfile != tmpfile.name: # return None if outfile set, output in outfile | ||
result = None | ||
|
||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
This
import geopandas
line shouldn't be here at the top-level. I'd suggest importing geopandas in thereturn_table
function itself if you need it, and only underelif data_format=="geopandas"
.