@@ -192,88 +192,87 @@ class Test26FixLengthWithIdGeneration extends AnyWordSpec with SparkTestBase wit
192
192
assertEqualsMultiline(actual, expected)
193
193
}
194
194
}
195
- }
196
195
197
- " correctly work with segment id generation option with length field" in {
198
- withTempBinFile(" fix_length_reg3" , " .dat" , binFileContentsLengthField) { tmpFileName =>
199
- val df = spark
200
- .read
201
- .format(" cobol" )
202
- .option(" copybook_contents" , copybook)
203
- .option(" record_format" , " F" )
204
- .option(" record_length_field" , " LEN" )
205
- .option(" strict_integral_precision" , " true" )
206
- .option(" segment_field" , " IND" )
207
- .option(" segment_id_prefix" , " ID" )
208
- .option(" segment_id_level0" , " A" )
209
- .option(" segment_id_level1" , " _" )
210
- .option(" redefine-segment-id-map:0" , " SEGMENT1 => A" )
211
- .option(" redefine-segment-id-map:1" , " SEGMENT2 => B" )
212
- .option(" redefine-segment-id-map:2" , " SEGMENT3 => C" )
213
- .option(" input_split_records" , 1 )
214
- .option(" pedantic" , " true" )
215
- .load(tmpFileName)
196
+ " correctly work with segment id generation option with length field and strict integral precision " in {
197
+ withTempBinFile(" fix_length_reg3" , " .dat" , binFileContentsLengthField) { tmpFileName =>
198
+ val df = spark
199
+ .read
200
+ .format(" cobol" )
201
+ .option(" copybook_contents" , copybook)
202
+ .option(" record_format" , " F" )
203
+ .option(" record_length_field" , " LEN" )
204
+ .option(" strict_integral_precision" , " true" )
205
+ .option(" segment_field" , " IND" )
206
+ .option(" segment_id_prefix" , " ID" )
207
+ .option(" segment_id_level0" , " A" )
208
+ .option(" segment_id_level1" , " _" )
209
+ .option(" redefine-segment-id-map:0" , " SEGMENT1 => A" )
210
+ .option(" redefine-segment-id-map:1" , " SEGMENT2 => B" )
211
+ .option(" redefine-segment-id-map:2" , " SEGMENT3 => C" )
212
+ .option(" input_split_records" , 1 )
213
+ .option(" pedantic" , " true" )
214
+ .load(tmpFileName)
216
215
217
- val actual = SparkUtils .convertDataFrameToPrettyJSON(df.drop(" LEN" ).orderBy(" Seg_Id0" , " Seg_Id1" ))
216
+ val actual = SparkUtils .convertDataFrameToPrettyJSON(df.drop(" LEN" ).orderBy(" Seg_Id0" , " Seg_Id1" ))
218
217
219
- assertEqualsMultiline(actual, expected)
218
+ assertEqualsMultiline(actual, expected)
219
+ }
220
220
}
221
- }
222
221
223
- " work with string values " in {
224
- val copybook =
225
- """ 01 R.
222
+ " correctly work when the length field has the string type " in {
223
+ val copybook =
224
+ """ 01 R.
226
225
05 LEN PIC X(1).
227
226
05 FIELD1 PIC X(1).
228
227
"""
229
228
230
- val binFileContentsLengthField : Array [Byte ] = Array [Byte ](
231
- // A1
232
- 0xF2 .toByte, 0xF3 .toByte, 0xF3 .toByte, 0xF4 .toByte
233
- ).map(_.toByte)
229
+ val binFileContentsLengthField : Array [Byte ] = Array [Byte ](
230
+ // A1
231
+ 0xF2 .toByte, 0xF3 .toByte, 0xF3 .toByte, 0xF4 .toByte
232
+ ).map(_.toByte)
234
233
235
- withTempBinFile(" fix_length_str" , " .dat" , binFileContentsLengthField) { tmpFileName =>
236
- val df = spark
237
- .read
238
- .format(" cobol" )
239
- .option(" copybook_contents" , copybook)
240
- .option(" record_format" , " F" )
241
- .option(" record_length_field" , " LEN" )
242
- .option(" pedantic" , " true" )
243
- .load(tmpFileName)
234
+ withTempBinFile(" fix_length_str" , " .dat" , binFileContentsLengthField) { tmpFileName =>
235
+ val df = spark
236
+ .read
237
+ .format(" cobol" )
238
+ .option(" copybook_contents" , copybook)
239
+ .option(" record_format" , " F" )
240
+ .option(" record_length_field" , " LEN" )
241
+ .option(" pedantic" , " true" )
242
+ .load(tmpFileName)
244
243
245
- assert(df.count() == 2 )
244
+ assert(df.count() == 2 )
245
+ }
246
246
}
247
- }
248
247
249
- " fail for incorrect string values" in {
250
- val copybook =
251
- """ 01 R.
248
+ " fail when the length field has the string type and incorrect string values are encountered " in {
249
+ val copybook =
250
+ """ 01 R.
252
251
05 LEN PIC X(1).
253
252
05 FIELD1 PIC X(1).
254
253
"""
255
254
256
- val binFileContentsLengthField : Array [Byte ] = Array [Byte ](
257
- // A1
258
- 0xF2 .toByte, 0xF3 .toByte, 0xC3 .toByte, 0xF4 .toByte
259
- ).map(_.toByte)
255
+ val binFileContentsLengthField : Array [Byte ] = Array [Byte ](
256
+ // A1
257
+ 0xF2 .toByte, 0xF3 .toByte, 0xC3 .toByte, 0xF4 .toByte
258
+ ).map(_.toByte)
260
259
261
- withTempBinFile(" fix_length_str" , " .dat" , binFileContentsLengthField) { tmpFileName =>
262
- val df = spark
263
- .read
264
- .format(" cobol" )
265
- .option(" copybook_contents" , copybook)
266
- .option(" record_format" , " F" )
267
- .option(" record_length_field" , " LEN" )
268
- .option(" pedantic" , " true" )
269
- .load(tmpFileName)
260
+ withTempBinFile(" fix_length_str" , " .dat" , binFileContentsLengthField) { tmpFileName =>
261
+ val df = spark
262
+ .read
263
+ .format(" cobol" )
264
+ .option(" copybook_contents" , copybook)
265
+ .option(" record_format" , " F" )
266
+ .option(" record_length_field" , " LEN" )
267
+ .option(" pedantic" , " true" )
268
+ .load(tmpFileName)
270
269
271
- val ex = intercept[SparkException ] {
272
- df.count()
273
- }
270
+ val ex = intercept[SparkException ] {
271
+ df.count()
272
+ }
274
273
275
- assert(ex.getCause.getMessage.contains(" Record length value of the field LEN must be an integral type, encountered: 'C'" ))
274
+ assert(ex.getCause.getMessage.contains(" Record length value of the field LEN must be an integral type, encountered: 'C'" ))
275
+ }
276
276
}
277
277
}
278
-
279
278
}
0 commit comments