15
15
from pathlib import Path
16
16
from typing import Any , Literal
17
17
18
+ import numpy as np
18
19
import xarray as xr
19
20
from pygmt .encodings import charset
20
21
from pygmt .exceptions import GMTInvalidInput
39
40
"ISO-8859-15" ,
40
41
"ISO-8859-16" ,
41
42
]
43
+ # Type hints for the list of possible data kinds.
44
+ Kind = Literal [
45
+ "arg" , "empty" , "file" , "geojson" , "grid" , "image" , "matrix" , "stringio" , "vectors"
46
+ ]
42
47
43
48
44
- def _validate_data_input ( # noqa: PLR0912
45
- data = None , x = None , y = None , z = None , required_data = True , required_cols = 2 , kind = None
46
- ):
49
+ def _validate_data_input (
50
+ data = None ,
51
+ x = None ,
52
+ y = None ,
53
+ z = None ,
54
+ required_data : bool = True ,
55
+ required_cols : int = 2 ,
56
+ kind : Kind | None = None ,
57
+ ) -> None :
47
58
"""
48
59
Check if the combination of data/x/y/z is valid.
49
60
@@ -76,23 +87,23 @@ def _validate_data_input( # noqa: PLR0912
76
87
>>> _validate_data_input(data=data, required_cols=3, kind="matrix")
77
88
Traceback (most recent call last):
78
89
...
79
- pygmt.exceptions.GMTInvalidInput: data needs 3 columns but 2 column(s) are given.
90
+ pygmt.exceptions.GMTInvalidInput: Need at least 3 columns but 2 column(s) are given.
80
91
>>> _validate_data_input(
81
92
... data=pd.DataFrame(data, columns=["x", "y"]),
82
93
... required_cols=3,
83
94
... kind="vectors",
84
95
... )
85
96
Traceback (most recent call last):
86
97
...
87
- pygmt.exceptions.GMTInvalidInput: data needs 3 columns but 2 column(s) are given.
98
+ pygmt.exceptions.GMTInvalidInput: Need at least 3 columns but 2 column(s) are given.
88
99
>>> _validate_data_input(
89
100
... data=xr.Dataset(pd.DataFrame(data, columns=["x", "y"])),
90
101
... kind="vectors",
91
102
... required_cols=3,
92
103
... )
93
104
Traceback (most recent call last):
94
105
...
95
- pygmt.exceptions.GMTInvalidInput: data needs 3 columns but 2 column(s) are given.
106
+ pygmt.exceptions.GMTInvalidInput: Need at least 3 columns but 2 column(s) are given.
96
107
>>> _validate_data_input(data="infile", x=[1, 2, 3])
97
108
Traceback (most recent call last):
98
109
...
@@ -115,42 +126,49 @@ def _validate_data_input( # noqa: PLR0912
115
126
GMTInvalidInput
116
127
If the data input is not valid.
117
128
"""
118
- if kind is None :
119
- kind = data_kind (data , required = required_data )
120
-
129
+ # Check if too much data is provided.
121
130
if data is not None and any (v is not None for v in (x , y , z )):
122
131
msg = "Too much data. Use either data or x/y/z."
123
132
raise GMTInvalidInput (msg )
124
133
134
+ # Determine the data kind if not provided.
135
+ kind = kind or data_kind (data , required = required_data )
136
+
137
+ # Check based on the data kind.
125
138
match kind :
126
- case "empty" :
127
- if x is None and y is None : # Both x and y are None.
139
+ case "empty" : # data is given via a series vectors like x/y/z.
140
+ if x is None and y is None :
128
141
msg = "No input data provided."
129
142
raise GMTInvalidInput (msg )
130
- if x is None or y is None : # Either x or y is None.
143
+ if x is None or y is None :
131
144
msg = "Must provide both x and y."
132
145
raise GMTInvalidInput (msg )
133
146
if required_cols >= 3 and z is None :
134
- # Both x and y are not None, now check z.
135
147
msg = "Must provide x, y, and z."
136
148
raise GMTInvalidInput (msg )
137
149
case "matrix" : # 2-D numpy.ndarray
138
150
if (actual_cols := data .shape [1 ]) < required_cols :
139
- msg = f"data needs { required_cols } columns but { actual_cols } column(s) are given."
151
+ msg = (
152
+ f"Need at least { required_cols } columns but { actual_cols } column(s) "
153
+ "are given."
154
+ )
140
155
raise GMTInvalidInput (msg )
141
156
case "vectors" :
157
+ # The if-else block should match the codes in the virtualfile_in function.
142
158
if hasattr (data , "items" ) and not hasattr (data , "to_frame" ):
143
- # Dict, pd.DataFrame, xr.Dataset
144
- arrays = [array for _ , array in data .items ()]
145
- if (actual_cols := len (arrays )) < required_cols :
146
- msg = f"data needs { required_cols } columns but { actual_cols } column(s) are given."
147
- raise GMTInvalidInput (msg )
148
-
149
- # Loop over columns to make sure they're not None
150
- for idx , array in enumerate (arrays [:required_cols ]):
151
- if array is None :
152
- msg = f"data needs { required_cols } columns but the { idx } column is None."
153
- raise GMTInvalidInput (msg )
159
+ # Dict, pandas.DataFrame, or xarray.Dataset, but not pd.Series.
160
+ _data = [array for _ , array in data .items ()]
161
+ else :
162
+ # Python list, tuple, numpy.ndarray, and pandas.Series types
163
+ _data = np .atleast_2d (np .asanyarray (data ).T )
164
+
165
+ # Check if the number of columns is sufficient.
166
+ if (actual_cols := len (_data )) < required_cols :
167
+ msg = (
168
+ f"Need at least { required_cols } columns but { actual_cols } "
169
+ "column(s) are given."
170
+ )
171
+ raise GMTInvalidInput (msg )
154
172
155
173
156
174
def _is_printable_ascii (argstr : str ) -> bool :
@@ -269,11 +287,7 @@ def _check_encoding(argstr: str) -> Encoding:
269
287
return "ISOLatin1+"
270
288
271
289
272
- def data_kind (
273
- data : Any , required : bool = True
274
- ) -> Literal [
275
- "arg" , "empty" , "file" , "geojson" , "grid" , "image" , "matrix" , "stringio" , "vectors"
276
- ]:
290
+ def data_kind (data : Any , required : bool = True ) -> Kind :
277
291
r"""
278
292
Check the kind of data that is provided to a module.
279
293
0 commit comments