#415 Add data source writer skeleton.

yruslan · yruslan · commit 324c53425f69 · 2025-05-21T14:59:25.000+02:00
diff --git a/spark-cobol/src/main/scala/za/co/absa/cobrix/spark/cobol/source/DefaultSource.scala b/spark-cobol/src/main/scala/za/co/absa/cobrix/spark/cobol/source/DefaultSource.scala
@@ -17,9 +17,9 @@
 package za.co.absa.cobrix.spark.cobol.source
 
 import org.apache.hadoop.fs.Path
-import org.apache.spark.sql.sources.{BaseRelation, DataSourceRegister, RelationProvider, SchemaRelationProvider}
+import org.apache.spark.sql.sources._
 import org.apache.spark.sql.types.StructType
-import org.apache.spark.sql.{SQLContext, SparkSession}
+import org.apache.spark.sql.{DataFrame, SQLContext, SaveMode, SparkSession}
 import za.co.absa.cobrix.cobol.internal.Logging
 import za.co.absa.cobrix.cobol.reader.parameters.CobolParameters
 import za.co.absa.cobrix.spark.cobol.parameters.CobolParametersParser._
@@ -35,6 +35,7 @@ import za.co.absa.cobrix.spark.cobol.utils.{BuildProperties, SparkUtils}
 class DefaultSource
   extends RelationProvider
     with SchemaRelationProvider
+    with CreatableRelationProvider
     with DataSourceRegister
     with ReaderFactory
     with Logging {
@@ -45,6 +46,7 @@ class DefaultSource
     createRelation(sqlContext, parameters, null)
   }
 
+  /** Reader relation */
   override def createRelation(sqlContext: SQLContext, parameters: Map[String, String], schema: StructType): BaseRelation = {
     CobolParametersValidator.validateOrThrow(parameters, sqlContext.sparkSession.sparkContext.hadoopConfiguration)
 
@@ -59,6 +61,36 @@ class DefaultSource
       cobolParameters.debugIgnoreFileSize)(sqlContext)
   }
 
+  /** Writer relation */
+  override def createRelation(sqlContext: SQLContext, mode: SaveMode, parameters: Map[String, String], data: DataFrame): BaseRelation = {
+    val path = parameters.getOrElse("path",
+      throw new IllegalArgumentException("Path is required for this data source."))
+
+    mode match {
+      case SaveMode.Overwrite =>
+        val outputPath = new Path(path)
+        val hadoopConf = sqlContext.sparkContext.hadoopConfiguration
+        val fs = outputPath.getFileSystem(hadoopConf)
+        if (fs.exists(outputPath)) {
+          fs.delete(outputPath, true)
+        }
+      case SaveMode.Append =>
+      case _ =>
+    }
+
+    // Simply save each row as comma-separated values in a text file
+    data.rdd
+      .map(row => row.mkString(","))
+      .saveAsTextFile(path)
+
+    new BaseRelation {
+      override def sqlContext: SQLContext = sqlContext
+
+      override def schema: StructType = data.schema
+    }
+  }
+
+
   //TODO fix with the correct implementation once the correct Reader hierarchy is put in place.
   override def buildReader(spark: SparkSession, parameters: Map[String, String]): FixedLenReader = null
 
diff --git a/spark-cobol/src/test/scala/za/co/absa/cobrix/spark/cobol/source/WriterSourceSpec.scala b/spark-cobol/src/test/scala/za/co/absa/cobrix/spark/cobol/source/WriterSourceSpec.scala
@@ -0,0 +1,45 @@
+/*
+ * Copyright 2018 ABSA Group Limited
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package za.co.absa.cobrix.spark.cobol.source
+
+import org.apache.hadoop.fs.Path
+import org.scalatest.wordspec.AnyWordSpec
+import za.co.absa.cobrix.spark.cobol.source.base.SparkTestBase
+import za.co.absa.cobrix.spark.cobol.source.fixtures.BinaryFileFixture
+import za.co.absa.cobrix.spark.cobol.utils.FileUtils
+
+class WriterSourceSpec extends AnyWordSpec with SparkTestBase with BinaryFileFixture {
+
+  import spark.implicits._
+
+  "writer" should {
+    "be able to write a basic dataframe" in {
+      withTempDirectory("writer") { tempDir =>
+        val df = List(("A", 1), ("B", 2), ("C", 3)).toDF("a", "b")
+
+        val outputPath = new Path(tempDir, "test")
+
+        df.write.format("cobol").save(outputPath.toString)
+
+        val files = FileUtils.getFiles(outputPath.toString, spark.sparkContext.hadoopConfiguration)
+
+        assert(files.nonEmpty)
+      }
+    }
+  }
+
+}