diff --git a/0492aa7e.8d568795.js b/0492aa7e.c0eeb818.js similarity index 98% rename from 0492aa7e.8d568795.js rename to 0492aa7e.c0eeb818.js index 01484116c..8c1a96c3f 100644 --- a/0492aa7e.8d568795.js +++ b/0492aa7e.c0eeb818.js @@ -1 +1 @@ -(window.webpackJsonp=window.webpackJsonp||[]).push([[6],{119:function(e,a,n){"use strict";n.d(a,"a",(function(){return b})),n.d(a,"b",(function(){return u}));var t=n(0),r=n.n(t);function s(e,a,n){return a in e?Object.defineProperty(e,a,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[a]=n,e}function o(e,a){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var t=Object.getOwnPropertySymbols(e);a&&(t=t.filter((function(a){return Object.getOwnPropertyDescriptor(e,a).enumerable}))),n.push.apply(n,t)}return n}function l(e){for(var a=1;a=0||(r[n]=e[n]);return r}(e,a);if(Object.getOwnPropertySymbols){var s=Object.getOwnPropertySymbols(e);for(t=0;t=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}var i=r.a.createContext({}),c=function(e){var a=r.a.useContext(i),n=a;return e&&(n="function"==typeof e?e(a):l(l({},a),e)),n},b=function(e){var a=c(e.components);return r.a.createElement(i.Provider,{value:a},e.children)},d={inlineCode:"code",wrapper:function(e){var a=e.children;return r.a.createElement(r.a.Fragment,{},a)}},m=r.a.forwardRef((function(e,a){var n=e.components,t=e.mdxType,s=e.originalType,o=e.parentName,i=p(e,["components","mdxType","originalType","parentName"]),b=c(n),m=t,u=b["".concat(o,".").concat(m)]||b[m]||d[m]||s;return n?r.a.createElement(u,l(l({ref:a},i),{},{components:n})):r.a.createElement(u,l({ref:a},i))}));function u(e,a){var n=arguments,t=a&&a.mdxType;if("string"==typeof e||t){var s=n.length,o=new Array(s);o[0]=m;var l={};for(var p in a)hasOwnProperty.call(a,p)&&(l[p]=a[p]);l.originalType=e,l.mdxType="string"==typeof e?e:t,o[1]=l;for(var i=2;i<none> is not a term",id:"datasets-and-none-is-not-a-term",children:[]},{value:"Example",id:"example",children:[]}],c={toc:i};function b(e){var a=e.components,n=Object(r.a)(e,o);return Object(s.b)("wrapper",Object(t.a)({},c,n,{components:a,mdxType:"MDXLayout"}),Object(s.b)("h2",{id:"introduction"},"Introduction"),Object(s.b)("p",null,"By default, Spark uses reflection to derive schemas and encoders from case\nclasses. This doesn't work well when there are messages that contain types that\nSpark does not understand such as enums, ",Object(s.b)("inlineCode",{parentName:"p"},"ByteString"),"s and ",Object(s.b)("inlineCode",{parentName:"p"},"oneof"),"s. To get around this, sparksql-scalapb provides its own ",Object(s.b)("inlineCode",{parentName:"p"},"Encoder"),"s for protocol buffers."),Object(s.b)("p",null,"However, it turns out there is another obstacle. Spark does not provide any mechanism to compose user-provided encoders with its own reflection-derived Encoders. Therefore, merely providing an ",Object(s.b)("inlineCode",{parentName:"p"},"Encoder")," for protocol buffers is insufficient to derive an encoder for regular case-classes that contain a protobuf as a field. To solve this problem, ScalaPB uses ",Object(s.b)("a",{parentName:"p",href:"https://github.com/typelevel/frameless"},"frameless")," which relies on implicit search to derive encoders. This approach enables combining ScalaPB's encoders with frameless encoders that takes care for all non-protobuf types."),Object(s.b)("h2",{id:"setting-up-your-project"},"Setting up your project"),Object(s.b)("p",null,"We are going to use sbt-assembly to deploy a fat JAR containing ScalaPB, and\nyour compiled protos. Make sure in project/plugins.sbt you have a line\nthat adds sbt-assembly:"),Object(s.b)("pre",null,Object(s.b)("code",{parentName:"pre",className:"language-scala"},'addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.14.10")\n')),Object(s.b)("p",null,"To add sparksql-scalapb to your project, add ",Object(s.b)("em",{parentName:"p"},"one")," of the following lines that\nmatches ",Object(s.b)("em",{parentName:"p"},"both the version of ScalaPB and Spark")," you use:"),Object(s.b)("pre",null,Object(s.b)("code",{parentName:"pre",className:"language-scala"},'// Spark 3.5 and ScalaPB 0.11\nlibraryDependencies += "com.thesamet.scalapb" %% "sparksql35-scalapb0_11" % "1.0.4"\n\n// Spark 3.4 and ScalaPB 0.11\nlibraryDependencies += "com.thesamet.scalapb" %% "sparksql34-scalapb0_11" % "1.0.4"\n\n// Spark 3.3 and ScalaPB 0.11\nlibraryDependencies += "com.thesamet.scalapb" %% "sparksql33-scalapb0_11" % "1.0.4"\n\n// Spark 3.2 and ScalaPB 0.11\nlibraryDependencies += "com.thesamet.scalapb" %% "sparksql32-scalapb0_11" % "1.0.4"\n\n// Spark 3.1 and ScalaPB 0.11\nlibraryDependencies += "com.thesamet.scalapb" %% "sparksql31-scalapb0_11" % "1.0.4"\n\n// Spark 3.0 and ScalaPB 0.11\nlibraryDependencies += "com.thesamet.scalapb" %% "sparksql30-scalapb0_11" % "1.0.1"\n\n// Spark 3.3 and ScalaPB 0.10\nlibraryDependencies += "com.thesamet.scalapb" %% "sparksql33-scalapb0_10" % "1.0.4"\n\n// Spark 3.2 and ScalaPB 0.10\nlibraryDependencies += "com.thesamet.scalapb" %% "sparksql32-scalapb0_10" % "1.0.4"\n\n// Spark 3.1 and ScalaPB 0.10\nlibraryDependencies += "com.thesamet.scalapb" %% "sparksql31-scalapb0_10" % "1.0.4"\n\n// Spark 3.0 and ScalaPB 0.10\nlibraryDependencies += "com.thesamet.scalapb" %% "sparksql30-scalapb0_10" % "1.0.1"\n\n// Spark 2.x and ScalaPB 0.10\nlibraryDependencies += "com.thesamet.scalapb" %% "sparksql-scalapb" % "0.10.4"\n\n// Spark 2.x and ScalaPB 0.9\nlibraryDependencies += "com.thesamet.scalapb" %% "sparksql-scalapb" % "0.9.3"\n')),Object(s.b)("p",null,"Known issue: Spark 3.2.1 is binary incompatible with Spark 3.2.0 in some of its internal\nAPIs being used. If you use Spark 3.2.0, please stick to sparksql-scalapb 1.0.0-M1."),Object(s.b)("p",null,"Spark ships with an old version of Google's Protocol Buffers runtime that is not compatible with\nthe current version. In addition, it comes with incompatible versions of scala-collection-compat\nand shapeless. Therefore, we need to shade these libraries. Add the following to your build.sbt:"),Object(s.b)("pre",null,Object(s.b)("code",{parentName:"pre",className:"language-scala"},'assemblyShadeRules in assembly := Seq(\n ShadeRule.rename("com.google.protobuf.**" -> "shadeproto.@1").inAll,\n ShadeRule.rename("scala.collection.compat.**" -> "shadecompat.@1").inAll,\n ShadeRule.rename("shapeless.**" -> "shadeshapeless.@1").inAll\n)\n')),Object(s.b)("p",null,"See ",Object(s.b)("a",{parentName:"p",href:"https://github.com/thesamet/sparksql-scalapb-test/blob/master/build.sbt"},"complete example of build.sbt"),"."),Object(s.b)("h2",{id:"using-sparksql-scalapb"},"Using sparksql-scalapb"),Object(s.b)("p",null,"We assume you have a ",Object(s.b)("inlineCode",{parentName:"p"},"SparkSession")," assigned to the variable ",Object(s.b)("inlineCode",{parentName:"p"},"spark"),". In a standalone Scala program, this can be created with:"),Object(s.b)("pre",null,Object(s.b)("code",{parentName:"pre",className:"language-scala"},'import org.apache.spark.sql.SparkSession\n\nval spark: SparkSession = SparkSession\n .builder()\n .appName("ScalaPB Demo")\n .master("local[2]")\n .getOrCreate()\n// spark: SparkSession = org.apache.spark.sql.SparkSession@f13cb83\n')),Object(s.b)("p",null,Object(s.b)("em",{parentName:"p"},"IMPORTANT"),": Ensure you do not import ",Object(s.b)("inlineCode",{parentName:"p"},"spark.implicits._")," to avoid ambiguity between ScalaPB provided encoders and Spark's default encoders. You may want to import ",Object(s.b)("inlineCode",{parentName:"p"},"StringToColumn")," to convert ",Object(s.b)("inlineCode",{parentName:"p"},'$"col name"')," into a ",Object(s.b)("inlineCode",{parentName:"p"},"Column"),". Add an import ",Object(s.b)("inlineCode",{parentName:"p"},"scalapb.spark.Implicits")," to add ScalaPB's encoders for protocol buffers into the implicit search scope:"),Object(s.b)("pre",null,Object(s.b)("code",{parentName:"pre",className:"language-scala"},"import org.apache.spark.sql.{Dataset, DataFrame, functions => F}\nimport spark.implicits.StringToColumn\nimport scalapb.spark.ProtoSQL\n\nimport scalapb.spark.Implicits._\n")),Object(s.b)("p",null,"The code snippets below use the ",Object(s.b)("a",{parentName:"p",href:"https://github.com/scalapb/ScalaPB/blob/master/docs/src/main/protobuf/person.proto"},Object(s.b)("inlineCode",{parentName:"a"},"Person")," message"),"."),Object(s.b)("p",null,"We start by creating some test data:"),Object(s.b)("pre",null,Object(s.b)("code",{parentName:"pre",className:"language-scala"},'import scalapb.docs.person.Person\nimport scalapb.docs.person.Person.{Address, AddressType}\n\nval testData = Seq(\n Person(name="John", age=32, addresses=Vector(\n Address(addressType=AddressType.HOME, street="Market", city="SF"))\n ),\n Person(name="Mike", age=29, addresses=Vector(\n Address(addressType=AddressType.WORK, street="Castro", city="MV"),\n Address(addressType=AddressType.HOME, street="Church", city="MV"))\n ),\n Person(name="Bart", age=27)\n)\n')),Object(s.b)("p",null,"We can create a ",Object(s.b)("inlineCode",{parentName:"p"},"DataFrame")," from the test data:"),Object(s.b)("pre",null,Object(s.b)("code",{parentName:"pre",className:"language-scala"},"val df = ProtoSQL.createDataFrame(spark, testData)\n// df: DataFrame = [name: string, age: int ... 1 more field]\ndf.printSchema()\n// root\n// |-- name: string (nullable = true)\n// |-- age: integer (nullable = true)\n// |-- addresses: array (nullable = false)\n// | |-- element: struct (containsNull = false)\n// | | |-- address_type: string (nullable = true)\n// | | |-- street: string (nullable = true)\n// | | |-- city: string (nullable = true)\n// \ndf.show()\n// +----+---+--------------------+\n// |name|age| addresses|\n// +----+---+--------------------+\n// |John| 32|[{HOME, Market, SF}]|\n// |Mike| 29|[{WORK, Castro, M...|\n// |Bart| 27| []|\n// +----+---+--------------------+\n//\n")),Object(s.b)("p",null,"and then process it as any other Dataframe in Spark:"),Object(s.b)("pre",null,Object(s.b)("code",{parentName:"pre",className:"language-scala"},'df.select($"name", F.size($"addresses").alias("address_count")).show()\n// +----+-------------+\n// |name|address_count|\n// +----+-------------+\n// |John| 1|\n// |Mike| 2|\n// |Bart| 0|\n// +----+-------------+\n// \n\nval nameAndAddress = df.select($"name", $"addresses".getItem(0).alias("firstAddress"))\n// nameAndAddress: DataFrame = [name: string, firstAddress: struct]\n\nnameAndAddress.show()\n// +----+------------------+\n// |name| firstAddress|\n// +----+------------------+\n// |John|{HOME, Market, SF}|\n// |Mike|{WORK, Castro, MV}|\n// |Bart| null|\n// +----+------------------+\n//\n')),Object(s.b)("p",null,"Using the datasets API it is possible to bring the data back to ScalaPB case classes:"),Object(s.b)("pre",null,Object(s.b)("code",{parentName:"pre",className:"language-scala"},"nameAndAddress.as[(String, Option[Address])].collect().foreach(println)\n// (John,Some(Address(HOME,Market,SF,UnknownFieldSet(Map()))))\n// (Mike,Some(Address(WORK,Castro,MV,UnknownFieldSet(Map()))))\n// (Bart,None)\n")),Object(s.b)("p",null,"You can create a Dataset directly using Spark APIs:"),Object(s.b)("pre",null,Object(s.b)("code",{parentName:"pre",className:"language-scala"},"spark.createDataset(testData)\n// res5: Dataset[Person] = [name: string, age: int ... 1 more field]\n")),Object(s.b)("h2",{id:"from-binary-to-protos-and-back"},"From Binary to protos and back"),Object(s.b)("p",null,"In some situations, you may need to deal with datasets that contain serialized protocol buffers. This can be handled by mapping the datasets through ScalaPB's ",Object(s.b)("inlineCode",{parentName:"p"},"parseFrom")," and ",Object(s.b)("inlineCode",{parentName:"p"},"toByteArray")," functions."),Object(s.b)("p",null,"Let's start by preparing a dataset with test binary data by mapping our ",Object(s.b)("inlineCode",{parentName:"p"},"testData"),":"),Object(s.b)("pre",null,Object(s.b)("code",{parentName:"pre",className:"language-scala"},"val binaryDS: Dataset[Array[Byte]] = spark.createDataset(testData.map(_.toByteArray))\n// binaryDS: Dataset[Array[Byte]] = [value: binary]\n\nbinaryDS.show()\n// +--------------------+\n// | value|\n// +--------------------+\n// |[0A 04 4A 6F 68 6...|\n// |[0A 04 4D 69 6B 6...|\n// |[0A 04 42 61 72 7...|\n// +--------------------+\n//\n")),Object(s.b)("p",null,"To turn this dataset into a ",Object(s.b)("inlineCode",{parentName:"p"},"Dataset[Person]"),", we map it through ",Object(s.b)("inlineCode",{parentName:"p"},"parseFrom"),":"),Object(s.b)("pre",null,Object(s.b)("code",{parentName:"pre",className:"language-scala"},"val protosDS: Dataset[Person] = binaryDS.map(Person.parseFrom(_))\n// protosDS: Dataset[Person] = [name: string, age: int ... 1 more field]\n")),Object(s.b)("p",null,"to turn a dataset of protos into ",Object(s.b)("inlineCode",{parentName:"p"},"Dataset[Array[Byte]]"),":"),Object(s.b)("pre",null,Object(s.b)("code",{parentName:"pre",className:"language-scala"},"val protosBinary: Dataset[Array[Byte]] = protosDS.map(_.toByteArray)\n// protosBinary: Dataset[Array[Byte]] = [value: binary]\n")),Object(s.b)("h2",{id:"on-enums"},"On enums"),Object(s.b)("p",null,"In SparkSQL-ScalaPB, enums are represented as strings. Unrecognized enum values are represented as strings containing the numeric value."),Object(s.b)("h2",{id:"dataframes-and-datasets-from-rdds"},"Dataframes and Datasets from RDDs"),Object(s.b)("pre",null,Object(s.b)("code",{parentName:"pre",className:"language-scala"},"import org.apache.spark.rdd.RDD\n\nval protoRDD: RDD[Person] = spark.sparkContext.parallelize(testData)\n\nval protoDF: DataFrame = ProtoSQL.protoToDataFrame(spark, protoRDD)\n\nval protoDS: Dataset[Person] = spark.createDataset(protoRDD)\n")),Object(s.b)("h2",{id:"udfs"},"UDFs"),Object(s.b)("p",null,"If you need to write a UDF that returns a message, it would not pick up our encoder and you may get a runtime failure. To work around this, sparksql-scalapb provides ",Object(s.b)("inlineCode",{parentName:"p"},"ProtoSQL.udf")," to create UDFs. For example, if you need to parse a binary column into a proto:"),Object(s.b)("pre",null,Object(s.b)("code",{parentName:"pre",className:"language-scala"},'val binaryDF = protosBinary.toDF("value")\n// binaryDF: DataFrame = [value: binary]\n\nval parsePersons = ProtoSQL.udf { bytes: Array[Byte] => Person.parseFrom(bytes) }\n// parsePersons: org.apache.spark.sql.Column => org.apache.spark.sql.Column = scalapb.spark.Udfs$$Lambda$13431/0x00000001038b5040@327161ad\n\nbinaryDF.withColumn("person", parsePersons($"value"))\n// res7: DataFrame = [value: binary, person: struct]\n')),Object(s.b)("h2",{id:"primitive-wrappers"},"Primitive wrappers"),Object(s.b)("p",null,"In ProtoSQL 0.9.x and 0.10.x, primitive wrappers are represented in Spark as structs\nwitha single field named ",Object(s.b)("inlineCode",{parentName:"p"},"value"),". A better representation in Spark would be a\nnullable field of the primitive type. The better representation will be the\ndefault in 0.11.x. To enable this representation today, replace the usages of\n",Object(s.b)("inlineCode",{parentName:"p"},"scalapb.spark.ProtoSQL")," with ",Object(s.b)("inlineCode",{parentName:"p"},"scalapb.spark.ProtoSQL.withPrimitiveWrappers"),".\nInstead of importing ",Object(s.b)("inlineCode",{parentName:"p"},"scalapb.spark.Implicits._"),", import\n",Object(s.b)("inlineCode",{parentName:"p"},"scalapb.spark.ProtoSQL.implicits._")),Object(s.b)("p",null,"See example in ",Object(s.b)("a",{parentName:"p",href:"https://github.com/scalapb/sparksql-scalapb/blob/80f3162b69313d57f95d3dcbfee865809873567a/sparksql-scalapb/src/test/scala/WrappersSpec.scala#L42-L59"},"WrappersSpec"),"."),Object(s.b)("h2",{id:"datasets-and-none-is-not-a-term"},"Datasets and ",Object(s.b)("inlineCode",{parentName:"h2"}," is not a term")),Object(s.b)("p",null,"You will see this error if for some reason Spark's ",Object(s.b)("inlineCode",{parentName:"p"},"Encoder"),"s are being picked up\ninstead of the ones provided by sparksql-scalapb. Please ensure you are not importing ",Object(s.b)("inlineCode",{parentName:"p"},"spark.implicits._"),". See instructions above for imports."),Object(s.b)("h2",{id:"example"},"Example"),Object(s.b)("p",null,"Check out a ",Object(s.b)("a",{parentName:"p",href:"https://github.com/thesamet/sparksql-scalapb-test"},"complete example")," here."))}b.isMDXComponent=!0}}]); \ No newline at end of file +(window.webpackJsonp=window.webpackJsonp||[]).push([[6],{119:function(e,a,n){"use strict";n.d(a,"a",(function(){return b})),n.d(a,"b",(function(){return u}));var t=n(0),r=n.n(t);function s(e,a,n){return a in e?Object.defineProperty(e,a,{value:n,enumerable:!0,configurable:!0,writable:!0}):e[a]=n,e}function o(e,a){var n=Object.keys(e);if(Object.getOwnPropertySymbols){var t=Object.getOwnPropertySymbols(e);a&&(t=t.filter((function(a){return Object.getOwnPropertyDescriptor(e,a).enumerable}))),n.push.apply(n,t)}return n}function l(e){for(var a=1;a=0||(r[n]=e[n]);return r}(e,a);if(Object.getOwnPropertySymbols){var s=Object.getOwnPropertySymbols(e);for(t=0;t=0||Object.prototype.propertyIsEnumerable.call(e,n)&&(r[n]=e[n])}return r}var i=r.a.createContext({}),c=function(e){var a=r.a.useContext(i),n=a;return e&&(n="function"==typeof e?e(a):l(l({},a),e)),n},b=function(e){var a=c(e.components);return r.a.createElement(i.Provider,{value:a},e.children)},d={inlineCode:"code",wrapper:function(e){var a=e.children;return r.a.createElement(r.a.Fragment,{},a)}},m=r.a.forwardRef((function(e,a){var n=e.components,t=e.mdxType,s=e.originalType,o=e.parentName,i=p(e,["components","mdxType","originalType","parentName"]),b=c(n),m=t,u=b["".concat(o,".").concat(m)]||b[m]||d[m]||s;return n?r.a.createElement(u,l(l({ref:a},i),{},{components:n})):r.a.createElement(u,l({ref:a},i))}));function u(e,a){var n=arguments,t=a&&a.mdxType;if("string"==typeof e||t){var s=n.length,o=new Array(s);o[0]=m;var l={};for(var p in a)hasOwnProperty.call(a,p)&&(l[p]=a[p]);l.originalType=e,l.mdxType="string"==typeof e?e:t,o[1]=l;for(var i=2;i<none> is not a term",id:"datasets-and-none-is-not-a-term",children:[]},{value:"Example",id:"example",children:[]}],c={toc:i};function b(e){var a=e.components,n=Object(r.a)(e,o);return Object(s.b)("wrapper",Object(t.a)({},c,n,{components:a,mdxType:"MDXLayout"}),Object(s.b)("h2",{id:"introduction"},"Introduction"),Object(s.b)("p",null,"By default, Spark uses reflection to derive schemas and encoders from case\nclasses. This doesn't work well when there are messages that contain types that\nSpark does not understand such as enums, ",Object(s.b)("inlineCode",{parentName:"p"},"ByteString"),"s and ",Object(s.b)("inlineCode",{parentName:"p"},"oneof"),"s. To get around this, sparksql-scalapb provides its own ",Object(s.b)("inlineCode",{parentName:"p"},"Encoder"),"s for protocol buffers."),Object(s.b)("p",null,"However, it turns out there is another obstacle. Spark does not provide any mechanism to compose user-provided encoders with its own reflection-derived Encoders. Therefore, merely providing an ",Object(s.b)("inlineCode",{parentName:"p"},"Encoder")," for protocol buffers is insufficient to derive an encoder for regular case-classes that contain a protobuf as a field. To solve this problem, ScalaPB uses ",Object(s.b)("a",{parentName:"p",href:"https://github.com/typelevel/frameless"},"frameless")," which relies on implicit search to derive encoders. This approach enables combining ScalaPB's encoders with frameless encoders that takes care for all non-protobuf types."),Object(s.b)("h2",{id:"setting-up-your-project"},"Setting up your project"),Object(s.b)("p",null,"We are going to use sbt-assembly to deploy a fat JAR containing ScalaPB, and\nyour compiled protos. Make sure in project/plugins.sbt you have a line\nthat adds sbt-assembly:"),Object(s.b)("pre",null,Object(s.b)("code",{parentName:"pre",className:"language-scala"},'addSbtPlugin("com.eed3si9n" % "sbt-assembly" % "0.14.10")\n')),Object(s.b)("p",null,"To add sparksql-scalapb to your project, add ",Object(s.b)("em",{parentName:"p"},"one")," of the following lines that\nmatches ",Object(s.b)("em",{parentName:"p"},"both the version of ScalaPB and Spark")," you use:"),Object(s.b)("pre",null,Object(s.b)("code",{parentName:"pre",className:"language-scala"},'// Spark 3.5 and ScalaPB 0.11\nlibraryDependencies += "com.thesamet.scalapb" %% "sparksql35-scalapb0_11" % "1.0.4"\n\n// Spark 3.4 and ScalaPB 0.11\nlibraryDependencies += "com.thesamet.scalapb" %% "sparksql34-scalapb0_11" % "1.0.4"\n\n// Spark 3.3 and ScalaPB 0.11\nlibraryDependencies += "com.thesamet.scalapb" %% "sparksql33-scalapb0_11" % "1.0.4"\n\n// Spark 3.2 and ScalaPB 0.11\nlibraryDependencies += "com.thesamet.scalapb" %% "sparksql32-scalapb0_11" % "1.0.4"\n\n// Spark 3.1 and ScalaPB 0.11\nlibraryDependencies += "com.thesamet.scalapb" %% "sparksql31-scalapb0_11" % "1.0.4"\n\n// Spark 3.0 and ScalaPB 0.11\nlibraryDependencies += "com.thesamet.scalapb" %% "sparksql30-scalapb0_11" % "1.0.1"\n\n// Spark 3.3 and ScalaPB 0.10\nlibraryDependencies += "com.thesamet.scalapb" %% "sparksql33-scalapb0_10" % "1.0.4"\n\n// Spark 3.2 and ScalaPB 0.10\nlibraryDependencies += "com.thesamet.scalapb" %% "sparksql32-scalapb0_10" % "1.0.4"\n\n// Spark 3.1 and ScalaPB 0.10\nlibraryDependencies += "com.thesamet.scalapb" %% "sparksql31-scalapb0_10" % "1.0.4"\n\n// Spark 3.0 and ScalaPB 0.10\nlibraryDependencies += "com.thesamet.scalapb" %% "sparksql30-scalapb0_10" % "1.0.1"\n\n// Spark 2.x and ScalaPB 0.10\nlibraryDependencies += "com.thesamet.scalapb" %% "sparksql-scalapb" % "0.10.4"\n\n// Spark 2.x and ScalaPB 0.9\nlibraryDependencies += "com.thesamet.scalapb" %% "sparksql-scalapb" % "0.9.3"\n')),Object(s.b)("p",null,"Known issue: Spark 3.2.1 is binary incompatible with Spark 3.2.0 in some of its internal\nAPIs being used. If you use Spark 3.2.0, please stick to sparksql-scalapb 1.0.0-M1."),Object(s.b)("p",null,"Spark ships with an old version of Google's Protocol Buffers runtime that is not compatible with\nthe current version. In addition, it comes with incompatible versions of scala-collection-compat\nand shapeless. Therefore, we need to shade these libraries. Add the following to your build.sbt:"),Object(s.b)("pre",null,Object(s.b)("code",{parentName:"pre",className:"language-scala"},'assemblyShadeRules in assembly := Seq(\n ShadeRule.rename("com.google.protobuf.**" -> "shadeproto.@1").inAll,\n ShadeRule.rename("scala.collection.compat.**" -> "shadecompat.@1").inAll,\n ShadeRule.rename("shapeless.**" -> "shadeshapeless.@1").inAll\n)\n')),Object(s.b)("p",null,"See ",Object(s.b)("a",{parentName:"p",href:"https://github.com/thesamet/sparksql-scalapb-test/blob/master/build.sbt"},"complete example of build.sbt"),"."),Object(s.b)("h2",{id:"using-sparksql-scalapb"},"Using sparksql-scalapb"),Object(s.b)("p",null,"We assume you have a ",Object(s.b)("inlineCode",{parentName:"p"},"SparkSession")," assigned to the variable ",Object(s.b)("inlineCode",{parentName:"p"},"spark"),". In a standalone Scala program, this can be created with:"),Object(s.b)("pre",null,Object(s.b)("code",{parentName:"pre",className:"language-scala"},'import org.apache.spark.sql.SparkSession\n\nval spark: SparkSession = SparkSession\n .builder()\n .appName("ScalaPB Demo")\n .master("local[2]")\n .getOrCreate()\n// spark: SparkSession = org.apache.spark.sql.SparkSession@b90929a\n')),Object(s.b)("p",null,Object(s.b)("em",{parentName:"p"},"IMPORTANT"),": Ensure you do not import ",Object(s.b)("inlineCode",{parentName:"p"},"spark.implicits._")," to avoid ambiguity between ScalaPB provided encoders and Spark's default encoders. You may want to import ",Object(s.b)("inlineCode",{parentName:"p"},"StringToColumn")," to convert ",Object(s.b)("inlineCode",{parentName:"p"},'$"col name"')," into a ",Object(s.b)("inlineCode",{parentName:"p"},"Column"),". Add an import ",Object(s.b)("inlineCode",{parentName:"p"},"scalapb.spark.Implicits")," to add ScalaPB's encoders for protocol buffers into the implicit search scope:"),Object(s.b)("pre",null,Object(s.b)("code",{parentName:"pre",className:"language-scala"},"import org.apache.spark.sql.{Dataset, DataFrame, functions => F}\nimport spark.implicits.StringToColumn\nimport scalapb.spark.ProtoSQL\n\nimport scalapb.spark.Implicits._\n")),Object(s.b)("p",null,"The code snippets below use the ",Object(s.b)("a",{parentName:"p",href:"https://github.com/scalapb/ScalaPB/blob/master/docs/src/main/protobuf/person.proto"},Object(s.b)("inlineCode",{parentName:"a"},"Person")," message"),"."),Object(s.b)("p",null,"We start by creating some test data:"),Object(s.b)("pre",null,Object(s.b)("code",{parentName:"pre",className:"language-scala"},'import scalapb.docs.person.Person\nimport scalapb.docs.person.Person.{Address, AddressType}\n\nval testData = Seq(\n Person(name="John", age=32, addresses=Vector(\n Address(addressType=AddressType.HOME, street="Market", city="SF"))\n ),\n Person(name="Mike", age=29, addresses=Vector(\n Address(addressType=AddressType.WORK, street="Castro", city="MV"),\n Address(addressType=AddressType.HOME, street="Church", city="MV"))\n ),\n Person(name="Bart", age=27)\n)\n')),Object(s.b)("p",null,"We can create a ",Object(s.b)("inlineCode",{parentName:"p"},"DataFrame")," from the test data:"),Object(s.b)("pre",null,Object(s.b)("code",{parentName:"pre",className:"language-scala"},"val df = ProtoSQL.createDataFrame(spark, testData)\n// df: DataFrame = [name: string, age: int ... 1 more field]\ndf.printSchema()\n// root\n// |-- name: string (nullable = true)\n// |-- age: integer (nullable = true)\n// |-- addresses: array (nullable = false)\n// | |-- element: struct (containsNull = false)\n// | | |-- address_type: string (nullable = true)\n// | | |-- street: string (nullable = true)\n// | | |-- city: string (nullable = true)\n// \ndf.show()\n// +----+---+--------------------+\n// |name|age| addresses|\n// +----+---+--------------------+\n// |John| 32|[{HOME, Market, SF}]|\n// |Mike| 29|[{WORK, Castro, M...|\n// |Bart| 27| []|\n// +----+---+--------------------+\n//\n")),Object(s.b)("p",null,"and then process it as any other Dataframe in Spark:"),Object(s.b)("pre",null,Object(s.b)("code",{parentName:"pre",className:"language-scala"},'df.select($"name", F.size($"addresses").alias("address_count")).show()\n// +----+-------------+\n// |name|address_count|\n// +----+-------------+\n// |John| 1|\n// |Mike| 2|\n// |Bart| 0|\n// +----+-------------+\n// \n\nval nameAndAddress = df.select($"name", $"addresses".getItem(0).alias("firstAddress"))\n// nameAndAddress: DataFrame = [name: string, firstAddress: struct]\n\nnameAndAddress.show()\n// +----+------------------+\n// |name| firstAddress|\n// +----+------------------+\n// |John|{HOME, Market, SF}|\n// |Mike|{WORK, Castro, MV}|\n// |Bart| null|\n// +----+------------------+\n//\n')),Object(s.b)("p",null,"Using the datasets API it is possible to bring the data back to ScalaPB case classes:"),Object(s.b)("pre",null,Object(s.b)("code",{parentName:"pre",className:"language-scala"},"nameAndAddress.as[(String, Option[Address])].collect().foreach(println)\n// (John,Some(Address(HOME,Market,SF,UnknownFieldSet(Map()))))\n// (Mike,Some(Address(WORK,Castro,MV,UnknownFieldSet(Map()))))\n// (Bart,None)\n")),Object(s.b)("p",null,"You can create a Dataset directly using Spark APIs:"),Object(s.b)("pre",null,Object(s.b)("code",{parentName:"pre",className:"language-scala"},"spark.createDataset(testData)\n// res5: Dataset[Person] = [name: string, age: int ... 1 more field]\n")),Object(s.b)("h2",{id:"from-binary-to-protos-and-back"},"From Binary to protos and back"),Object(s.b)("p",null,"In some situations, you may need to deal with datasets that contain serialized protocol buffers. This can be handled by mapping the datasets through ScalaPB's ",Object(s.b)("inlineCode",{parentName:"p"},"parseFrom")," and ",Object(s.b)("inlineCode",{parentName:"p"},"toByteArray")," functions."),Object(s.b)("p",null,"Let's start by preparing a dataset with test binary data by mapping our ",Object(s.b)("inlineCode",{parentName:"p"},"testData"),":"),Object(s.b)("pre",null,Object(s.b)("code",{parentName:"pre",className:"language-scala"},"val binaryDS: Dataset[Array[Byte]] = spark.createDataset(testData.map(_.toByteArray))\n// binaryDS: Dataset[Array[Byte]] = [value: binary]\n\nbinaryDS.show()\n// +--------------------+\n// | value|\n// +--------------------+\n// |[0A 04 4A 6F 68 6...|\n// |[0A 04 4D 69 6B 6...|\n// |[0A 04 42 61 72 7...|\n// +--------------------+\n//\n")),Object(s.b)("p",null,"To turn this dataset into a ",Object(s.b)("inlineCode",{parentName:"p"},"Dataset[Person]"),", we map it through ",Object(s.b)("inlineCode",{parentName:"p"},"parseFrom"),":"),Object(s.b)("pre",null,Object(s.b)("code",{parentName:"pre",className:"language-scala"},"val protosDS: Dataset[Person] = binaryDS.map(Person.parseFrom(_))\n// protosDS: Dataset[Person] = [name: string, age: int ... 1 more field]\n")),Object(s.b)("p",null,"to turn a dataset of protos into ",Object(s.b)("inlineCode",{parentName:"p"},"Dataset[Array[Byte]]"),":"),Object(s.b)("pre",null,Object(s.b)("code",{parentName:"pre",className:"language-scala"},"val protosBinary: Dataset[Array[Byte]] = protosDS.map(_.toByteArray)\n// protosBinary: Dataset[Array[Byte]] = [value: binary]\n")),Object(s.b)("h2",{id:"on-enums"},"On enums"),Object(s.b)("p",null,"In SparkSQL-ScalaPB, enums are represented as strings. Unrecognized enum values are represented as strings containing the numeric value."),Object(s.b)("h2",{id:"dataframes-and-datasets-from-rdds"},"Dataframes and Datasets from RDDs"),Object(s.b)("pre",null,Object(s.b)("code",{parentName:"pre",className:"language-scala"},"import org.apache.spark.rdd.RDD\n\nval protoRDD: RDD[Person] = spark.sparkContext.parallelize(testData)\n\nval protoDF: DataFrame = ProtoSQL.protoToDataFrame(spark, protoRDD)\n\nval protoDS: Dataset[Person] = spark.createDataset(protoRDD)\n")),Object(s.b)("h2",{id:"udfs"},"UDFs"),Object(s.b)("p",null,"If you need to write a UDF that returns a message, it would not pick up our encoder and you may get a runtime failure. To work around this, sparksql-scalapb provides ",Object(s.b)("inlineCode",{parentName:"p"},"ProtoSQL.udf")," to create UDFs. For example, if you need to parse a binary column into a proto:"),Object(s.b)("pre",null,Object(s.b)("code",{parentName:"pre",className:"language-scala"},'val binaryDF = protosBinary.toDF("value")\n// binaryDF: DataFrame = [value: binary]\n\nval parsePersons = ProtoSQL.udf { bytes: Array[Byte] => Person.parseFrom(bytes) }\n// parsePersons: org.apache.spark.sql.Column => org.apache.spark.sql.Column = scalapb.spark.Udfs$$Lambda$13418/0x00000001038b5840@3694829f\n\nbinaryDF.withColumn("person", parsePersons($"value"))\n// res7: DataFrame = [value: binary, person: struct]\n')),Object(s.b)("h2",{id:"primitive-wrappers"},"Primitive wrappers"),Object(s.b)("p",null,"In ProtoSQL 0.9.x and 0.10.x, primitive wrappers are represented in Spark as structs\nwitha single field named ",Object(s.b)("inlineCode",{parentName:"p"},"value"),". A better representation in Spark would be a\nnullable field of the primitive type. The better representation will be the\ndefault in 0.11.x. To enable this representation today, replace the usages of\n",Object(s.b)("inlineCode",{parentName:"p"},"scalapb.spark.ProtoSQL")," with ",Object(s.b)("inlineCode",{parentName:"p"},"scalapb.spark.ProtoSQL.withPrimitiveWrappers"),".\nInstead of importing ",Object(s.b)("inlineCode",{parentName:"p"},"scalapb.spark.Implicits._"),", import\n",Object(s.b)("inlineCode",{parentName:"p"},"scalapb.spark.ProtoSQL.implicits._")),Object(s.b)("p",null,"See example in ",Object(s.b)("a",{parentName:"p",href:"https://github.com/scalapb/sparksql-scalapb/blob/80f3162b69313d57f95d3dcbfee865809873567a/sparksql-scalapb/src/test/scala/WrappersSpec.scala#L42-L59"},"WrappersSpec"),"."),Object(s.b)("h2",{id:"datasets-and-none-is-not-a-term"},"Datasets and ",Object(s.b)("inlineCode",{parentName:"h2"}," is not a term")),Object(s.b)("p",null,"You will see this error if for some reason Spark's ",Object(s.b)("inlineCode",{parentName:"p"},"Encoder"),"s are being picked up\ninstead of the ones provided by sparksql-scalapb. Please ensure you are not importing ",Object(s.b)("inlineCode",{parentName:"p"},"spark.implicits._"),". See instructions above for imports."),Object(s.b)("h2",{id:"example"},"Example"),Object(s.b)("p",null,"Check out a ",Object(s.b)("a",{parentName:"p",href:"https://github.com/thesamet/sparksql-scalapb-test"},"complete example")," here."))}b.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/28fe488f.5c74e0e4.js b/28fe488f.54f3afb9.js similarity index 91% rename from 28fe488f.5c74e0e4.js rename to 28fe488f.54f3afb9.js index b252f6f75..1b016cf50 100644 --- a/28fe488f.5c74e0e4.js +++ b/28fe488f.54f3afb9.js @@ -1 +1 @@ -(window.webpackJsonp=window.webpackJsonp||[]).push([[14],{119:function(e,n,t){"use strict";t.d(n,"a",(function(){return b})),t.d(n,"b",(function(){return u}));var a=t(0),r=t.n(a);function o(e,n,t){return n in e?Object.defineProperty(e,n,{value:t,enumerable:!0,configurable:!0,writable:!0}):e[n]=t,e}function s(e,n){var t=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);n&&(a=a.filter((function(n){return Object.getOwnPropertyDescriptor(e,n).enumerable}))),t.push.apply(t,a)}return t}function i(e){for(var n=1;n=0||(r[t]=e[t]);return r}(e,n);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,t)&&(r[t]=e[t])}return r}var c=r.a.createContext({}),p=function(e){var n=r.a.useContext(c),t=n;return e&&(t="function"==typeof e?e(n):i(i({},n),e)),t},b=function(e){var n=p(e.components);return r.a.createElement(c.Provider,{value:n},e.children)},d={inlineCode:"code",wrapper:function(e){var n=e.children;return r.a.createElement(r.a.Fragment,{},n)}},m=r.a.forwardRef((function(e,n){var t=e.components,a=e.mdxType,o=e.originalType,s=e.parentName,c=l(e,["components","mdxType","originalType","parentName"]),b=p(t),m=a,u=b["".concat(s,".").concat(m)]||b[m]||d[m]||o;return t?r.a.createElement(u,i(i({ref:n},c),{},{components:t})):r.a.createElement(u,i({ref:n},c))}));function u(e,n){var t=arguments,a=n&&n.mdxType;if("string"==typeof e||a){var o=t.length,s=new Array(o);s[0]=m;var i={};for(var l in n)hasOwnProperty.call(n,l)&&(i[l]=n[l]);i.originalType=e,i.mdxType="string"==typeof e?e:a,s[1]=i;for(var c=2;c,\n// UnknownFieldSet(Map())\n// )\n// ),\n// UnknownFieldSet(Map())\n// )\n\nval typeRegistry = TypeRegistry().addMessage[MyMessage]\n// typeRegistry: TypeRegistry = TypeRegistry(\n// Map(\n// "type.googleapis.com/com.thesamet.docs.MyMessage" -> com.thesamet.docs.json.MyMessage$@ab70b75\n// ),\n// Set()\n// )\n\nval printer = new Printer().withTypeRegistry(typeRegistry)\n// printer: Printer = scalapb.json4s.Printer@5af90342\n\nprinter.print(c)\n// res0: String = "{\\"myAny\\":{\\"@type\\":\\"type.googleapis.com/com.thesamet.docs.MyMessage\\",\\"x\\":17}}"\n')),Object(o.b)("p",null,"Conversely, you can start from a JSON and parse it back to a ",Object(o.b)("inlineCode",{parentName:"p"},"MyContainer")," that contains an ",Object(o.b)("inlineCode",{parentName:"p"},"Any")," field:"),Object(o.b)("pre",null,Object(o.b)("code",{parentName:"pre",className:"language-scala"},'val parser = new Parser().withTypeRegistry(typeRegistry)\n// parser: Parser = scalapb.json4s.Parser@3bef8f15\n\nparser.fromJsonString[MyContainer]("""\n {\n "myAny": {\n "@type": "type.googleapis.com/com.thesamet.docs.MyMessage",\n "x": 17\n }\n }""")\n// res1: MyContainer = MyContainer(\n// Some(\n// Any(\n// "type.googleapis.com/com.thesamet.docs.MyMessage",\n// ,\n// UnknownFieldSet(Map())\n// )\n// ),\n// UnknownFieldSet(Map())\n// )\n')))}b.isMDXComponent=!0}}]); \ No newline at end of file +(window.webpackJsonp=window.webpackJsonp||[]).push([[14],{119:function(e,n,t){"use strict";t.d(n,"a",(function(){return b})),t.d(n,"b",(function(){return u}));var a=t(0),r=t.n(a);function o(e,n,t){return n in e?Object.defineProperty(e,n,{value:t,enumerable:!0,configurable:!0,writable:!0}):e[n]=t,e}function s(e,n){var t=Object.keys(e);if(Object.getOwnPropertySymbols){var a=Object.getOwnPropertySymbols(e);n&&(a=a.filter((function(n){return Object.getOwnPropertyDescriptor(e,n).enumerable}))),t.push.apply(t,a)}return t}function i(e){for(var n=1;n=0||(r[t]=e[t]);return r}(e,n);if(Object.getOwnPropertySymbols){var o=Object.getOwnPropertySymbols(e);for(a=0;a=0||Object.prototype.propertyIsEnumerable.call(e,t)&&(r[t]=e[t])}return r}var c=r.a.createContext({}),p=function(e){var n=r.a.useContext(c),t=n;return e&&(t="function"==typeof e?e(n):i(i({},n),e)),t},b=function(e){var n=p(e.components);return r.a.createElement(c.Provider,{value:n},e.children)},d={inlineCode:"code",wrapper:function(e){var n=e.children;return r.a.createElement(r.a.Fragment,{},n)}},m=r.a.forwardRef((function(e,n){var t=e.components,a=e.mdxType,o=e.originalType,s=e.parentName,c=l(e,["components","mdxType","originalType","parentName"]),b=p(t),m=a,u=b["".concat(s,".").concat(m)]||b[m]||d[m]||o;return t?r.a.createElement(u,i(i({ref:n},c),{},{components:t})):r.a.createElement(u,i({ref:n},c))}));function u(e,n){var t=arguments,a=n&&n.mdxType;if("string"==typeof e||a){var o=t.length,s=new Array(o);s[0]=m;var i={};for(var l in n)hasOwnProperty.call(n,l)&&(i[l]=n[l]);i.originalType=e,i.mdxType="string"==typeof e?e:a,s[1]=i;for(var c=2;c,\n// UnknownFieldSet(Map())\n// )\n// ),\n// UnknownFieldSet(Map())\n// )\n\nval typeRegistry = TypeRegistry().addMessage[MyMessage]\n// typeRegistry: TypeRegistry = TypeRegistry(\n// Map(\n// "type.googleapis.com/com.thesamet.docs.MyMessage" -> com.thesamet.docs.json.MyMessage$@785c9f08\n// ),\n// Set()\n// )\n\nval printer = new Printer().withTypeRegistry(typeRegistry)\n// printer: Printer = scalapb.json4s.Printer@2a09bfaa\n\nprinter.print(c)\n// res0: String = "{\\"myAny\\":{\\"@type\\":\\"type.googleapis.com/com.thesamet.docs.MyMessage\\",\\"x\\":17}}"\n')),Object(o.b)("p",null,"Conversely, you can start from a JSON and parse it back to a ",Object(o.b)("inlineCode",{parentName:"p"},"MyContainer")," that contains an ",Object(o.b)("inlineCode",{parentName:"p"},"Any")," field:"),Object(o.b)("pre",null,Object(o.b)("code",{parentName:"pre",className:"language-scala"},'val parser = new Parser().withTypeRegistry(typeRegistry)\n// parser: Parser = scalapb.json4s.Parser@75710908\n\nparser.fromJsonString[MyContainer]("""\n {\n "myAny": {\n "@type": "type.googleapis.com/com.thesamet.docs.MyMessage",\n "x": 17\n }\n }""")\n// res1: MyContainer = MyContainer(\n// Some(\n// Any(\n// "type.googleapis.com/com.thesamet.docs.MyMessage",\n// ,\n// UnknownFieldSet(Map())\n// )\n// ),\n// UnknownFieldSet(Map())\n// )\n')))}b.isMDXComponent=!0}}]); \ No newline at end of file diff --git a/404.html b/404.html index 0671ae4b1..a303f3f34 100644 --- a/404.html +++ b/404.html @@ -10,14 +10,14 @@ Page Not Found | ScalaPB - +

Page Not Found

We could not find what you were looking for.

Please contact the owner of the site that linked you to the original URL and let them know their link is broken.

- + \ No newline at end of file diff --git a/blog/2019/05/28/hola/index.html b/blog/2019/05/28/hola/index.html index 05fd07e5c..5282a12df 100644 --- a/blog/2019/05/28/hola/index.html +++ b/blog/2019/05/28/hola/index.html @@ -10,7 +10,7 @@ Hola | ScalaPB - + @@ -23,7 +23,7 @@

Hola

Gao Wei

Gao Wei

Docusaurus Core Team

Lorem ipsum dolor sit amet, consectetur adipiscing elit. Pellentesque elementum dignissim ultricies. Fusce rhoncus ipsum tempor eros aliquam consequat. Lorem ipsum dolor sit amet

- + diff --git a/blog/2019/05/29/hello-world/index.html b/blog/2019/05/29/hello-world/index.html index 81be4a520..7369e9a8a 100644 --- a/blog/2019/05/29/hello-world/index.html +++ b/blog/2019/05/29/hello-world/index.html @@ -10,7 +10,7 @@ Hello | ScalaPB - + @@ -23,7 +23,7 @@
- + diff --git a/blog/2019/05/30/welcome/index.html b/blog/2019/05/30/welcome/index.html index 191348fc8..60f9eff3e 100644 --- a/blog/2019/05/30/welcome/index.html +++ b/blog/2019/05/30/welcome/index.html @@ -10,7 +10,7 @@ Welcome | ScalaPB - + @@ -23,7 +23,7 @@

Welcome

Yangshun Tay

Yangshun Tay

Front End Engineer @ Facebook

Blog features are powered by the blog plugin. Simply add files to the blog directory. It supports tags as well!

Delete the whole directory if you don't want the blog features. As simple as that!

- + diff --git a/blog/index.html b/blog/index.html index 9e5b21963..7b7f88038 100644 --- a/blog/index.html +++ b/blog/index.html @@ -10,7 +10,7 @@ Blog | ScalaPB - + @@ -26,7 +26,7 @@

Welcome

Yangshun Tay

Yangshun Tay

Front End Engineer @ Facebook

Blog features are powered by the blog plugin. Simply add files to the blog directory. It supports tags as well!

Delete the whole directory if you don't want the blog features. As simple as that!

Hola

Gao Wei

Gao Wei

Docusaurus Core Team

Lorem ipsum dolor sit amet, consectetur adipiscing elit. Pellentesque elementum dignissim ultricies. Fusce rhoncus ipsum tempor eros aliquam consequat. Lorem ipsum dolor sit amet

- + diff --git a/blog/tags/docusaurus/index.html b/blog/tags/docusaurus/index.html index 2d909b1f8..1d3b4310c 100644 --- a/blog/tags/docusaurus/index.html +++ b/blog/tags/docusaurus/index.html @@ -10,7 +10,7 @@ Posts tagged "docusaurus" | ScalaPB - + @@ -26,7 +26,7 @@

3 posts tagged with "docusaurus"

View All Tags

Welcome

Yangshun Tay

Yangshun Tay

Front End Engineer @ Facebook

Blog features are powered by the blog plugin. Simply add files to the blog directory. It supports tags as well!

Delete the whole directory if you don't want the blog features. As simple as that!

Hola

Gao Wei

Gao Wei

Docusaurus Core Team

Lorem ipsum dolor sit amet, consectetur adipiscing elit. Pellentesque elementum dignissim ultricies. Fusce rhoncus ipsum tempor eros aliquam consequat. Lorem ipsum dolor sit amet

- + diff --git a/blog/tags/facebook/index.html b/blog/tags/facebook/index.html index 3ed23e3ac..0ad5b37bb 100644 --- a/blog/tags/facebook/index.html +++ b/blog/tags/facebook/index.html @@ -10,7 +10,7 @@ Posts tagged "facebook" | ScalaPB - + @@ -24,7 +24,7 @@

1 post tagged with "facebook"

View All Tags

Welcome

Yangshun Tay

Yangshun Tay

Front End Engineer @ Facebook

Blog features are powered by the blog plugin. Simply add files to the blog directory. It supports tags as well!

Delete the whole directory if you don't want the blog features. As simple as that!

- + diff --git a/blog/tags/hello/index.html b/blog/tags/hello/index.html index e1bda4748..a3d7abacf 100644 --- a/blog/tags/hello/index.html +++ b/blog/tags/hello/index.html @@ -10,7 +10,7 @@ Posts tagged "hello" | ScalaPB - + @@ -25,7 +25,7 @@

2 posts tagged with "hello"

View All Tags

Welcome

Yangshun Tay

Yangshun Tay

Front End Engineer @ Facebook

Blog features are powered by the blog plugin. Simply add files to the blog directory. It supports tags as well!

Delete the whole directory if you don't want the blog features. As simple as that!

- + diff --git a/blog/tags/hola/index.html b/blog/tags/hola/index.html index 6156a2bfb..e5108f38f 100644 --- a/blog/tags/hola/index.html +++ b/blog/tags/hola/index.html @@ -10,7 +10,7 @@ Posts tagged "hola" | ScalaPB - + @@ -24,7 +24,7 @@

1 post tagged with "hola"

View All Tags

Hola

Gao Wei

Gao Wei

Docusaurus Core Team

Lorem ipsum dolor sit amet, consectetur adipiscing elit. Pellentesque elementum dignissim ultricies. Fusce rhoncus ipsum tempor eros aliquam consequat. Lorem ipsum dolor sit amet

- + diff --git a/blog/tags/index.html b/blog/tags/index.html index 7dfd5be5a..0da41883c 100644 --- a/blog/tags/index.html +++ b/blog/tags/index.html @@ -10,7 +10,7 @@ Tags | ScalaPB - + @@ -22,7 +22,7 @@ - + diff --git a/docs/common-protos/index.html b/docs/common-protos/index.html index f9392617b..00966645c 100644 --- a/docs/common-protos/index.html +++ b/docs/common-protos/index.html @@ -10,7 +10,7 @@ Common protos | ScalaPB - + @@ -29,7 +29,7 @@ the classpath. This is accomplished by adding the library as a normal dependency.

If you don't have any proto files that import the common protos, then you can omit the "protobuf" dependency.

Adding new packages#

If you don't see your favorite third-party proto package here, and there is already a maven package for it that provides the proto files (with possibly Java generated classes), you can send a pull request to common-protos to have it added. See instruction on the ScalaPB Common Protos project page on Github.

Available packages#

proto-google-common-protos#

ScalaPB 0.11.x:

libraryDependencies ++= Seq(
"com.thesamet.scalapb.common-protos" %% "proto-google-common-protos-scalapb_0.11" % "2.9.6-0" % "protobuf",
"com.thesamet.scalapb.common-protos" %% "proto-google-common-protos-scalapb_0.11" % "2.9.6-0"
)

ScalaPB 0.10.x:

libraryDependencies ++= Seq(
"com.thesamet.scalapb.common-protos" %% "proto-google-common-protos-scalapb_0.10" % "2.9.6-0" % "protobuf",
"com.thesamet.scalapb.common-protos" %% "proto-google-common-protos-scalapb_0.10" % "2.9.6-0"
)

ScalaPB 0.9.x:

libraryDependencies ++= Seq(
"com.thesamet.scalapb.common-protos" %% "proto-google-common-protos-scalapb_0.9" % "2.9.6-0" % "protobuf",
"com.thesamet.scalapb.common-protos" %% "proto-google-common-protos-scalapb_0.9" % "2.9.6-0"
)

proto-google-cloud-pubsub-v1#

ScalaPB 0.11.x:

libraryDependencies ++= Seq(
"com.thesamet.scalapb.common-protos" %% "proto-google-cloud-pubsub-v1-scalapb_0.11" % "1.102.20-0" % "protobuf",
"com.thesamet.scalapb.common-protos" %% "proto-google-cloud-pubsub-v1-scalapb_0.11" % "1.102.20-0"
)

ScalaPB 0.10.x:

libraryDependencies ++= Seq(
"com.thesamet.scalapb.common-protos" %% "proto-google-cloud-pubsub-v1-scalapb_0.10" % "1.102.20-0" % "protobuf",
"com.thesamet.scalapb.common-protos" %% "proto-google-cloud-pubsub-v1-scalapb_0.10" % "1.102.20-0"
)

ScalaPB 0.9.x:

libraryDependencies ++= Seq(
"com.thesamet.scalapb.common-protos" %% "proto-google-cloud-pubsub-v1-scalapb_0.9" % "1.102.20-0" % "protobuf",
"com.thesamet.scalapb.common-protos" %% "proto-google-cloud-pubsub-v1-scalapb_0.9" % "1.102.20-0"
)

pgv-proto#

ScalaPB 0.11.x:

libraryDependencies ++= Seq(
"com.thesamet.scalapb.common-protos" %% "pgv-proto-scalapb_0.11" % "0.6.13-0" % "protobuf",
"com.thesamet.scalapb.common-protos" %% "pgv-proto-scalapb_0.11" % "0.6.13-0"
)

ScalaPB 0.10.x:

libraryDependencies ++= Seq(
"com.thesamet.scalapb.common-protos" %% "pgv-proto-scalapb_0.10" % "0.6.13-0" % "protobuf",
"com.thesamet.scalapb.common-protos" %% "pgv-proto-scalapb_0.10" % "0.6.13-0"
)

ScalaPB 0.9.x:

libraryDependencies ++= Seq(
"com.thesamet.scalapb.common-protos" %% "pgv-proto-scalapb_0.9" % "0.6.13-0" % "protobuf",
"com.thesamet.scalapb.common-protos" %% "pgv-proto-scalapb_0.9" % "0.6.13-0"
)

footer

- + diff --git a/docs/contact/index.html b/docs/contact/index.html index 7835c70fe..6139ab327 100644 --- a/docs/contact/index.html +++ b/docs/contact/index.html @@ -10,7 +10,7 @@ Contacting us | ScalaPB - + @@ -46,7 +46,7 @@ would be a great way to support the time and effort put into the development of ScalaPB!

- + diff --git a/docs/customizations/index.html b/docs/customizations/index.html index d2fb33a64..13d867946 100644 --- a/docs/customizations/index.html +++ b/docs/customizations/index.html @@ -10,7 +10,7 @@ Customizations | ScalaPB - + @@ -227,7 +227,7 @@ sealed oneofs:

message Foo {
option (scalapb.message).derives = "yourpkg.Show";
...
}}}
message Expr {
option (scalapb.message).sealed_oneof_derives = "yourpkg.Show";
oneof sealed_value {
...
}
}
- + diff --git a/docs/dotty/index.html b/docs/dotty/index.html index d1ea74cc1..b02aafe42 100644 --- a/docs/dotty/index.html +++ b/docs/dotty/index.html @@ -10,7 +10,7 @@ Using with Dotty | ScalaPB - + @@ -29,7 +29,7 @@ the Scala compiler with the default compiler settings. It is known that currently the generator will provide an error if -language:strictEquality is set.

- + diff --git a/docs/faq/index.html b/docs/faq/index.html index 3049c6455..eea3fbe11 100644 --- a/docs/faq/index.html +++ b/docs/faq/index.html @@ -10,7 +10,7 @@ Frequently Asked Questions | ScalaPB - + @@ -76,7 +76,7 @@
Use a recent version of sbt-protoc (at least 1.0.6), which defaults to a
compatible version of protoc (3.19.2).
- + diff --git a/docs/generated-code/index.html b/docs/generated-code/index.html index b0653829e..d3b74a9cd 100644 --- a/docs/generated-code/index.html +++ b/docs/generated-code/index.html @@ -10,7 +10,7 @@ Generated Code | ScalaPB - + @@ -119,7 +119,7 @@ toJavaProto methods.
  • The companion object for enums will have fromJavaValue and toJavaValue methods.
  • - + diff --git a/docs/generic/index.html b/docs/generic/index.html index b3ae0283c..698b86158 100644 --- a/docs/generic/index.html +++ b/docs/generic/index.html @@ -10,7 +10,7 @@ Writing generic code | ScalaPB - + @@ -46,7 +46,7 @@ to return, and the filename. The Scala compiler will automatically find the appropriate message companion to pass as cmp via implicit search:

    readFromFile[Person]("/tmp/person.pb")
    - + diff --git a/docs/getting-started/index.html b/docs/getting-started/index.html index d7c1f5982..4138ca48f 100644 --- a/docs/getting-started/index.html +++ b/docs/getting-started/index.html @@ -10,7 +10,7 @@ Protocol Buffer Tutorial: Scala | ScalaPB - + @@ -46,7 +46,7 @@
    Person(
    id = id,
    name = name,
    email = if (email.nonEmpty) Some(email) else None,
    phones = phones
    )
    }
    def addPerson(): Unit = {
    val newPerson = personFromStdin()
    val addressBook = readFromFile()
    // Append the new person to the people list field
    val updated = addressBook.update(
    _.people :+= newPerson
    )
    Using(new FileOutputStream("addressbook.pb")) { output =>
    updated.writeTo(output)
    }
    }

    Running the example#

    In sbt, type run

    This document, "Protocol Buffer Tutorial: Scala" is a modification of "Protocol Buffer Basics: Java", which is a work created and shared by Google and used according to terms described in the Creative Commons 4.0 Attribution License.

    - + diff --git a/docs/grpc/index.html b/docs/grpc/index.html index 3c7dbbdd1..7ebf15fd9 100644 --- a/docs/grpc/index.html +++ b/docs/grpc/index.html @@ -10,7 +10,7 @@ gRPC | ScalaPB - + @@ -44,7 +44,7 @@ closely the offical grpc-java API. Example project coming soon.

    grpc-netty issues#

    In certain situations (for example when you have a fat jar), you may see the following exception:

    Exception in thread "main" io.grpc.ManagedChannelProvider$ProviderNotFoundException: No functional server found. Try adding a dependency on the grpc-netty artifact

    To work around this issue, try the following solutions:

    1. Create a NettyServer explicitly using io.grpc.netty.NettyServerBuilder.

    Example:

    NettyServerBuilder
    .forPort(9000)
    .keepAliveTime(500, TimeUnit.SECONDS)
    1. If using SBT, try the following merge conflict strategy:
    assemblyMergeStrategy in assembly := {
    case x if x.contains("io.netty.versions.properties") => MergeStrategy.discard
    case x =>
    val oldStrategy = (assemblyMergeStrategy in assembly).value
    oldStrategy(x)
    }
    - + diff --git a/docs/index.html b/docs/index.html index 599e5e3d0..574f150bb 100644 --- a/docs/index.html +++ b/docs/index.html @@ -10,7 +10,7 @@ ScalaPB: Scala Protocol Buffer Compiler | ScalaPB - + @@ -33,7 +33,7 @@ Oneof's that were introduced in Protocol Buffers 2.6.0.

  • Newer: Supports Scala.js (in 0.5.x).

  • Newer: Supports gRPC (in 0.5.x).

  • Newest: Supports SparkSQL (in 0.5.23).

  • Newest: Supports converting to and from JSON (in 0.5.x).

  • Newest: Supports User-defined options (in 0.5.29).

  • - + diff --git a/docs/installation/index.html b/docs/installation/index.html index 1bdcb634c..890a68a9f 100644 --- a/docs/installation/index.html +++ b/docs/installation/index.html @@ -10,7 +10,7 @@ Installing ScalaPB | ScalaPB - + @@ -31,7 +31,7 @@ use scalapbc (ScalaPB compiler).

    See ScalaPBC.

    Running from Maven#

    Using ScalaPBC, you can get maven to generate the code for you. Check out the ScalaPB Maven example.

    Next:#

    Read about the Generated Code.

    - + diff --git a/docs/json/index.html b/docs/json/index.html index 6deefed39..934dce1dc 100644 --- a/docs/json/index.html +++ b/docs/json/index.html @@ -10,7 +10,7 @@ ScalaPB and JSON | ScalaPB - + @@ -18,7 +18,7 @@ - +
    @@ -45,13 +45,13 @@ strings. To use the numeric representation, set this option to true. Note that due to the way Javascript represents numbers, there is a possibility to lose precision (more details here).

    The parser can be instantiated with new scalapb.json4s.Parser(), and various methods can return instances of the parser with customized configuration:

    • ignoringUnkownFields: by default the parser will throw a JsonFormatException when encountering unknown fields. By enabling this option, unknown options will be silently ignored.
    • ignoringOverlappingOneofFields: by default the parser will throw a JsonFormatException if values are provided for more than one field within the same oneof. By enabling this option, when more than one field is present for a oneof, one of the values of this field will be picked for the oneof.
    • mapEntriesAsKeyValuePairs: by default, protobuf maps are modeled as json objects. When this setting is enabled, protobuf maps are expected to be read as arrays of objects with key and value keys.

    See the list of constructor paramerters here

    Printing and parsing Anys#

    In Protocol Buffers, google.protobuf.Any is a type that embeds an arbitrary protobuf message. An Any is represented as a message that contains a typeUrl field that identifies the type, and a bytes field value which contains the serialized contents of a message. In JSON, the message embedded in the Any is serialized as usual, and there is a @type key added to it to identify which message it is. The parser expects this @type key to know which message it is. To accomplish this, all the expected embedded types need to be registered with a TypeRegistry so the printer and parser know how to process the embedded message.

    The following example is based on this proto.

    import com.thesamet.docs.json._
    import scalapb.json4s.{Printer, Parser, TypeRegistry}
    -
    val c = MyContainer(
    myAny=Some(
    com.google.protobuf.any.Any.pack(
    MyMessage(x=17)
    )
    )
    )
    // c: MyContainer = MyContainer(
    // Some(
    // Any(
    // "type.googleapis.com/com.thesamet.docs.MyMessage",
    // <ByteString@5b6aa50d size=2 contents="\b\021">,
    // UnknownFieldSet(Map())
    // )
    // ),
    // UnknownFieldSet(Map())
    // )
    -
    val typeRegistry = TypeRegistry().addMessage[MyMessage]
    // typeRegistry: TypeRegistry = TypeRegistry(
    // Map(
    // "type.googleapis.com/com.thesamet.docs.MyMessage" -> com.thesamet.docs.json.MyMessage$@ab70b75
    // ),
    // Set()
    // )
    -
    val printer = new Printer().withTypeRegistry(typeRegistry)
    // printer: Printer = scalapb.json4s.Printer@5af90342
    -
    printer.print(c)
    // res0: String = "{\"myAny\":{\"@type\":\"type.googleapis.com/com.thesamet.docs.MyMessage\",\"x\":17}}"

    Conversely, you can start from a JSON and parse it back to a MyContainer that contains an Any field:

    val parser = new Parser().withTypeRegistry(typeRegistry)
    // parser: Parser = scalapb.json4s.Parser@3bef8f15
    -
    parser.fromJsonString[MyContainer]("""
    {
    "myAny": {
    "@type": "type.googleapis.com/com.thesamet.docs.MyMessage",
    "x": 17
    }
    }""")
    // res1: MyContainer = MyContainer(
    // Some(
    // Any(
    // "type.googleapis.com/com.thesamet.docs.MyMessage",
    // <ByteString@75e67ca6 size=2 contents="\b\021">,
    // UnknownFieldSet(Map())
    // )
    // ),
    // UnknownFieldSet(Map())
    // )
    +
    val c = MyContainer(
    myAny=Some(
    com.google.protobuf.any.Any.pack(
    MyMessage(x=17)
    )
    )
    )
    // c: MyContainer = MyContainer(
    // Some(
    // Any(
    // "type.googleapis.com/com.thesamet.docs.MyMessage",
    // <ByteString@762161f9 size=2 contents="\b\021">,
    // UnknownFieldSet(Map())
    // )
    // ),
    // UnknownFieldSet(Map())
    // )
    +
    val typeRegistry = TypeRegistry().addMessage[MyMessage]
    // typeRegistry: TypeRegistry = TypeRegistry(
    // Map(
    // "type.googleapis.com/com.thesamet.docs.MyMessage" -> com.thesamet.docs.json.MyMessage$@785c9f08
    // ),
    // Set()
    // )
    +
    val printer = new Printer().withTypeRegistry(typeRegistry)
    // printer: Printer = scalapb.json4s.Printer@2a09bfaa
    +
    printer.print(c)
    // res0: String = "{\"myAny\":{\"@type\":\"type.googleapis.com/com.thesamet.docs.MyMessage\",\"x\":17}}"

    Conversely, you can start from a JSON and parse it back to a MyContainer that contains an Any field:

    val parser = new Parser().withTypeRegistry(typeRegistry)
    // parser: Parser = scalapb.json4s.Parser@75710908
    +
    parser.fromJsonString[MyContainer]("""
    {
    "myAny": {
    "@type": "type.googleapis.com/com.thesamet.docs.MyMessage",
    "x": 17
    }
    }""")
    // res1: MyContainer = MyContainer(
    // Some(
    // Any(
    // "type.googleapis.com/com.thesamet.docs.MyMessage",
    // <ByteString@ae5f346 size=2 contents="\b\021">,
    // UnknownFieldSet(Map())
    // )
    // ),
    // UnknownFieldSet(Map())
    // )
    - + @@ -59,6 +59,6 @@ - + \ No newline at end of file diff --git a/docs/sbt-settings/index.html b/docs/sbt-settings/index.html index 80b39993c..5bf9d67ed 100644 --- a/docs/sbt-settings/index.html +++ b/docs/sbt-settings/index.html @@ -10,7 +10,7 @@ SBT Settings | ScalaPB - + @@ -30,7 +30,7 @@ however this is configurable using the Compile / PB.protoSources setting.

    By default, sbt-protoc invokes protoc 3.x that is shipped with protoc-jar. If you would like to run a different version of protoc:

    PB.protocVersion := "-v3.11.4"

    See all available options in sbt-protoc documentation

    Java Conversions#

    To enable Java conversions add the following to your build.sbt:

    Compile / PB.targets := Seq(
    PB.gens.java -> (Compile / sourceManaged).value,
    scalapb.gen(javaConversions=true) -> (Compile / sourceManaged).value
    )

    gRPC#

    Generating gRPC stubs for services is enabled by default. To disable:

    Compile / PB.targets := Seq(
    scalapb.gen(grpc=false) -> (Compile / sourceManaged).value
    )

    Additional options to the generator#

    scalapb.gen(
    flatPackage: Boolean = false,
    javaConversions: Boolean = false,
    grpc: Boolean = true,
    singleLineToProtoString: Boolean = false,
    asciiFormatToString: Boolean = false,
    lenses: Boolean = true,
    retainSourceCodeInfo: Boolean = false
    )
    OptionscalapbcDescription
    flatPackageflat_packageWhen set, ScalaPB will not append the protofile base name to the package name.
    javaConversionsjava_conversionsGenerates in the companion object two functions, toJavaProto and fromJavaProto that convert between the Scala case class and the Java protobufs. For the generated code to compile, the Java protobuf code need to be also generated or available as a library dependency.
    grpcgrpcGenerates gRPC code for services. Default is true in scalapb.gen, and need to be explicitly specified in scalapbc.
    singleLineToProtoStringsingle_line_to_proto_stringBy default, ScalaPB generates a toProtoString() method that renders the message as a multi-line format (using TextFormat.printToUnicodeString). If set, ScalaPB generates toString() methods that use the single line format.
    asciiFormatToStringascii_format_to_stringSetting this to true, overrides toString to return a standard ASCII representation of the message by calling toProtoString.
    lensesno_lensesBy default, ScalaPB generates lenses for each message for easy updating. If you are not using this feature and would like to reduce code size or compilation time, you can set this to false and lenses will not be generated.
    retainSourceCodeInforetain_source_code_infoRetain source code information (locations, comments) provided by protoc in the descriptors. Use the location accessor to get that information from a descriptor.
    scala3Sourcesscala3_sourcesIf set, generates sources that are error-free under -source future with Scala 3, or Xsource:3 with Scala 2.13.
    - + diff --git a/docs/scala.js/index.html b/docs/scala.js/index.html index c0b3b3dd0..ca5f431bf 100644 --- a/docs/scala.js/index.html +++ b/docs/scala.js/index.html @@ -10,7 +10,7 @@ Using ScalaPB with Scala.js | ScalaPB - + @@ -29,7 +29,7 @@ 0.6.0 would cover most use cases)

    Getting Started#

    Add to your library dependencies:

    libraryDependencies ++= Seq(
    "com.thesamet.scalapb" %%% "scalapb-runtime" % scalapb.compiler.Version.scalapbVersion,
    // The following needed only if you include scalapb/scalapb.proto:
    "com.thesamet.scalapb" %%% "scalapb-runtime" % scalapb.compiler.Version.scalapbVersion % "protobuf"
    )

    Demo#

    Example project: https://github.com/thesamet/scalapbjs-test

    Example with multi-project build: https://github.com/thesamet/sbt-protoc/tree/master/examples/scalajs-multiproject

    Live demo: http://thesamet.github.io/scalapbjs-test/

    - + diff --git a/docs/scalapbc/index.html b/docs/scalapbc/index.html index 9680cec10..b082f514f 100644 --- a/docs/scalapbc/index.html +++ b/docs/scalapbc/index.html @@ -10,7 +10,7 @@ ScalaPBC: ScalaPB's standalone compiler | ScalaPB - + @@ -37,7 +37,7 @@ GRPC descriptors that the generated ZIO code depends on.

    bin/scalapbc --plugin-artifact=com.thesamet.scalapb.zio-grpc:protoc-gen-zio:0.1.0:default,classifier=unix,ext=sh,type=jar -- e2e/src/main/protobuf/service.proto --zio_out=/tmp/out --scala_out=grpc:/tmp/out -Ie2e/src/main/protobuf -Ithird_party -Iprotobuf

    bin/scalapbc --plugin-artifact=io.grpc:grpc-java:

    Using ScalaPB as a proper protoc plugin#

    You may want to use ScalaPB code generator as a standard protoc plugin (rather than using scalapbc as a wrapper or through SBT).

    For Linux and Mac OS X, you can download a native executable version of the plugin for Scala from our release page:

    Those zip files contain native executables of the plugin for the respective operating system built using GraalVM. If you are using another operating system, or prefer to use a JVM based plugin implementation, you will find executable scripts for Windows and Unix-like operating systems in maven. These scripts require a JVM to run. The JVM needs to be available on the path, or through the JAVA_HOME environment variable.

    To generate code:

    protoc my.protos --plugin=/path/to/bin/protoc-gen-scala-0.11.11-unix.sh --scala_out=scala

    On Windows:

    protoc my.protos --plugin=protoc-gen-scala=/path/to/bin/protoc-gen-scala.bat --scala_out=scala

    For passing parameters to the plugin, see the section above.

    Note that the standalone plugin provided in scalapbc needs to be able to find a JVM in the path or through JAVA_HOME environment variable. If you encounter unexpected errors, try to execute the plugin directly from the command line, and the output printed may be useful for further debugging.

    The generated code depends on scalapb-runtime to compile. To get the code to work, add a dependency on scalapb-runtime to your project. The version of scalapb-runtime needs to match or be newer than the version of the plugin.

    - + diff --git a/docs/sealed-oneofs/index.html b/docs/sealed-oneofs/index.html index 1573ff008..8926cc0c7 100644 --- a/docs/sealed-oneofs/index.html +++ b/docs/sealed-oneofs/index.html @@ -10,7 +10,7 @@ Sealed oneofs | ScalaPB - + @@ -41,7 +41,7 @@
    case class Mul(left: Option[Expr], right: Option[Expr]) extends Expr with GeneratedMessage
    case class Programs(exprs: Seq[Option[Expr]]) extends GeneratedMessage
    - + diff --git a/docs/sparksql/index.html b/docs/sparksql/index.html index 13e359d7c..df1abd352 100644 --- a/docs/sparksql/index.html +++ b/docs/sparksql/index.html @@ -10,7 +10,7 @@ Using ScalaPB with Spark | ScalaPB - + @@ -18,7 +18,7 @@ - +
    @@ -42,7 +42,7 @@ APIs being used. If you use Spark 3.2.0, please stick to sparksql-scalapb 1.0.0-M1.

    Spark ships with an old version of Google's Protocol Buffers runtime that is not compatible with the current version. In addition, it comes with incompatible versions of scala-collection-compat and shapeless. Therefore, we need to shade these libraries. Add the following to your build.sbt:

    assemblyShadeRules in assembly := Seq(
    ShadeRule.rename("com.google.protobuf.**" -> "shadeproto.@1").inAll,
    ShadeRule.rename("scala.collection.compat.**" -> "shadecompat.@1").inAll,
    ShadeRule.rename("shapeless.**" -> "shadeshapeless.@1").inAll
    )

    See complete example of build.sbt.

    Using sparksql-scalapb#

    We assume you have a SparkSession assigned to the variable spark. In a standalone Scala program, this can be created with:

    import org.apache.spark.sql.SparkSession
    -
    val spark: SparkSession = SparkSession
    .builder()
    .appName("ScalaPB Demo")
    .master("local[2]")
    .getOrCreate()
    // spark: SparkSession = org.apache.spark.sql.SparkSession@f13cb83

    IMPORTANT: Ensure you do not import spark.implicits._ to avoid ambiguity between ScalaPB provided encoders and Spark's default encoders. You may want to import StringToColumn to convert $"col name" into a Column. Add an import scalapb.spark.Implicits to add ScalaPB's encoders for protocol buffers into the implicit search scope:

    import org.apache.spark.sql.{Dataset, DataFrame, functions => F}
    import spark.implicits.StringToColumn
    import scalapb.spark.ProtoSQL
    +
    val spark: SparkSession = SparkSession
    .builder()
    .appName("ScalaPB Demo")
    .master("local[2]")
    .getOrCreate()
    // spark: SparkSession = org.apache.spark.sql.SparkSession@b90929a

    IMPORTANT: Ensure you do not import spark.implicits._ to avoid ambiguity between ScalaPB provided encoders and Spark's default encoders. You may want to import StringToColumn to convert $"col name" into a Column. Add an import scalapb.spark.Implicits to add ScalaPB's encoders for protocol buffers into the implicit search scope:

    import org.apache.spark.sql.{Dataset, DataFrame, functions => F}
    import spark.implicits.StringToColumn
    import scalapb.spark.ProtoSQL
    import scalapb.spark.Implicits._

    The code snippets below use the Person message.

    We start by creating some test data:

    import scalapb.docs.person.Person
    import scalapb.docs.person.Person.{Address, AddressType}
    val testData = Seq(
    Person(name="John", age=32, addresses=Vector(
    Address(addressType=AddressType.HOME, street="Market", city="SF"))
    ),
    Person(name="Mike", age=29, addresses=Vector(
    Address(addressType=AddressType.WORK, street="Castro", city="MV"),
    Address(addressType=AddressType.HOME, street="Church", city="MV"))
    ),
    Person(name="Bart", age=27)
    )

    We can create a DataFrame from the test data:

    val df = ProtoSQL.createDataFrame(spark, testData)
    // df: DataFrame = [name: string, age: int ... 1 more field]
    df.printSchema()
    // root
    // |-- name: string (nullable = true)
    // |-- age: integer (nullable = true)
    // |-- addresses: array (nullable = false)
    // | |-- element: struct (containsNull = false)
    // | | |-- address_type: string (nullable = true)
    // | | |-- street: string (nullable = true)
    // | | |-- city: string (nullable = true)
    //
    df.show()
    // +----+---+--------------------+
    // |name|age| addresses|
    // +----+---+--------------------+
    // |John| 32|[{HOME, Market, SF}]|
    // |Mike| 29|[{WORK, Castro, M...|
    // |Bart| 27| []|
    // +----+---+--------------------+
    //

    and then process it as any other Dataframe in Spark:

    df.select($"name", F.size($"addresses").alias("address_count")).show()
    // +----+-------------+
    // |name|address_count|
    // +----+-------------+
    // |John| 1|
    // |Mike| 2|
    // |Bart| 0|
    // +----+-------------+
    //
    val nameAndAddress = df.select($"name", $"addresses".getItem(0).alias("firstAddress"))
    // nameAndAddress: DataFrame = [name: string, firstAddress: struct<address_type: string, street: string ... 1 more field>]
    @@ -51,7 +51,7 @@
    val protoRDD: RDD[Person] = spark.sparkContext.parallelize(testData)
    val protoDF: DataFrame = ProtoSQL.protoToDataFrame(spark, protoRDD)
    val protoDS: Dataset[Person] = spark.createDataset(protoRDD)

    UDFs#

    If you need to write a UDF that returns a message, it would not pick up our encoder and you may get a runtime failure. To work around this, sparksql-scalapb provides ProtoSQL.udf to create UDFs. For example, if you need to parse a binary column into a proto:

    val binaryDF = protosBinary.toDF("value")
    // binaryDF: DataFrame = [value: binary]
    -
    val parsePersons = ProtoSQL.udf { bytes: Array[Byte] => Person.parseFrom(bytes) }
    // parsePersons: org.apache.spark.sql.Column => org.apache.spark.sql.Column = scalapb.spark.Udfs$$Lambda$13431/0x00000001038b5040@327161ad
    +
    val parsePersons = ProtoSQL.udf { bytes: Array[Byte] => Person.parseFrom(bytes) }
    // parsePersons: org.apache.spark.sql.Column => org.apache.spark.sql.Column = scalapb.spark.Udfs$$Lambda$13418/0x00000001038b5840@3694829f
    binaryDF.withColumn("person", parsePersons($"value"))
    // res7: DataFrame = [value: binary, person: struct<name: string, age: int ... 1 more field>]

    Primitive wrappers#

    In ProtoSQL 0.9.x and 0.10.x, primitive wrappers are represented in Spark as structs witha single field named value. A better representation in Spark would be a nullable field of the primitive type. The better representation will be the @@ -61,7 +61,7 @@ scalapb.spark.ProtoSQL.implicits._

    See example in WrappersSpec.

    Datasets and <none> is not a term#

    You will see this error if for some reason Spark's Encoders are being picked up instead of the ones provided by sparksql-scalapb. Please ensure you are not importing spark.implicits._. See instructions above for imports.

    Example#

    Check out a complete example here.

    - + @@ -69,6 +69,6 @@ - + \ No newline at end of file diff --git a/docs/third-party-protos/index.html b/docs/third-party-protos/index.html index f08d1a94e..6956fc674 100644 --- a/docs/third-party-protos/index.html +++ b/docs/third-party-protos/index.html @@ -10,7 +10,7 @@ Using third-party protos | ScalaPB - + @@ -26,7 +26,7 @@
    Compile / PB.targets := Seq(
    scalapb.gen() -> (Compile / sourceManaged).value
    )
    )
    // myProject contains its own protos which rely on protos from externalProtos
    lazy val myProject = (project in file("my-project"))
    .dependsOn(externalProtos)
    .settings(
    Compile / PB.targets := Seq(
    scalapb.gen() -> (Compile / sourceManaged).value
    )
    )

    See full example here.

    - + diff --git a/docs/transformations/index.html b/docs/transformations/index.html index 99183c81c..b09372d5f 100644 --- a/docs/transformations/index.html +++ b/docs/transformations/index.html @@ -10,7 +10,7 @@ Transformations | ScalaPB - + @@ -44,7 +44,7 @@
    import "scalapb/scalapb.proto";
    option (scalapb.options) = {
    scope: PACKAGE
    field_transformations : [
    {
    when : {
    type: TYPE_MESSAGE
    type_name: ".google.protobuf.Timestamp"
    }
    set : {[scalapb.field] {type : 'com.myexample.MyType' }}
    }
    ]
    };
    note

    Note the . (dot) prefix in the type_name field above. It is needed as explained here. In this example we assume the user's package is not named google or google.protobuf since then type_name could be relative and would not match.

    Now, we need to make sure there is an implicit typemapper converting between google.protobuf.timestamp.Timestamp and com.myexample.MyType. The typemapper can be defined in the companion object of MyType as exampled in custom types.

    - + diff --git a/docs/upgrading/index.html b/docs/upgrading/index.html index 93b5e2fae..9f622b014 100644 --- a/docs/upgrading/index.html +++ b/docs/upgrading/index.html @@ -10,7 +10,7 @@ Upgrade guide | ScalaPB - + @@ -34,7 +34,7 @@ ScalaPB SBT Settings.

    If you are using files like scalapb.proto and Google's well-known proto change the library dependency from:

    "com.trueaccord.scalapb" %% "scalapb-runtime" % "0.11.11" % PB.protobufConfig

    to:

    "com.thesamet.scalapb" %% "scalapb-runtime" % scalapb.compiler.Version.scalapbVersion % "protobuf"
    - + diff --git a/docs/user_defined_options/index.html b/docs/user_defined_options/index.html index 11fc3d21e..351f11ed2 100644 --- a/docs/user_defined_options/index.html +++ b/docs/user_defined_options/index.html @@ -10,7 +10,7 @@ Defining custom options | ScalaPB - + @@ -45,7 +45,7 @@
    assert(use_opts.OneMessage.scalaDescriptor.getOptions.extension(
    my_opts.CustomOptionsMyOptsProto.myMessageOption).get ==
    my_opts.MyMessageOption().update(_.priority := 17))
    assert(numberField.getOptions.extension(
    my_opts.Wrapper.tags) == Seq(
    my_opts.Tag(name = Some("tag1")),
    my_opts.Tag(name = Some("tag2"))))

    Example code#

    The full source code of this example is available below:

    - + diff --git a/docs/validation/index.html b/docs/validation/index.html index 679ca79b0..2a172d7ce 100644 --- a/docs/validation/index.html +++ b/docs/validation/index.html @@ -10,7 +10,7 @@ Validating Protobufs | ScalaPB - + @@ -67,7 +67,7 @@
    field_transformations : [ {
    when : {options: {[validate.rules] {int32 : {gt : 1}}}} // <-- 1 can be replaced with any number
    set : {[scalapb.field] {type : "Int Refined Greater[$(options.[validate.rules].int32.gt)]"}}
    match_type : PRESENCE
    } ]
    };
    message Test {
    int32 gt_test = 1 [ (validate.rules).int32 = {gt : 5} ]; // transformed to: Int Refined Greater[5]
    }

    For this to work, a typemapper for refined types need to be either put in a package object in the same package where the code is generated, or be manually imported through import options.

    The typemapper used in scalapb-validate tests is here.

    Additional resources:

    - + diff --git a/docs/writing-plugins/index.html b/docs/writing-plugins/index.html index c3ae2035c..7bec65957 100644 --- a/docs/writing-plugins/index.html +++ b/docs/writing-plugins/index.html @@ -10,7 +10,7 @@ Writing protoc plugins in Scala | ScalaPB - + @@ -53,7 +53,7 @@ library with a % "protobuf" scope. To use:

    import "myplugin.proto";
    message MyMessage {
    option (myplugin.myopts).my_option = false;
    }

    Publishing the plugin#

    The project can be published to Maven using the “publish” command. We recommend to use the excellent sbt-ci-release plugin to automatically build a snapshot on each commit, and a full release when pushing a git tag.

    SBT users of your code generators will add your plugin to the build by adding it to their project/plugins.sbt like this:

    Compile / PB.targets := Seq(
    scalapb.gen() -> (Compile / sourceManaged).value / "scalapb",
    com.myplugin.gen() -> (Compile / sourceManaged).value / "scalapb"
    )

    The template also publishes artifacts with names ending with unix.sh and windows.bat. These are executable jars for Unix and Windows systems that contain all the classes needed to run your code generator (except of a JVM which is expected to be in JAVA_HOME or in the PATH). This is useful if your users need to use your plugin directly with protoc, or with a build tool such as maven.

    Secondary outputs#

    note

    Secondary outputs were introduced in protoc-bridge 0.9.0 and are supported by sbt-protoc 1.0.0 and onwards.

    Secondary outputs provide a simple way for protoc plugins to pass information for other protoc plugins running after them in the same protoc invocation. The information is passed through files that are created in a temporary directory. The absolute path of that temporary directory is provided to all protoc plugins. Plugins may create new files in that directory for subsequent plugins to consume.

    Conventions:

    • Names of secondary output files should be in kebab-case, and should clearly identify the plugin producing them. For example scalapb-validate-preprocessor.
    • The content of the file should be a serialized google.protobuf.Any message that packs the arbitrary payload the plugin wants to publish.

    Determining the secondary output directory location#

    JVM-based plugins that are executed in the same JVM that spawns protoc (like the ones described on this page), receive the location of the secondary output directory via the CodeGeneratorRequest. protoc-bridge appends to the request an unknown field carrying a message called ExtraEnv which contains the path to the secondary output directory.

    Other plugins that are invoked directly by protoc can find the secondary output directory by inspecting the SCALAPB_SECONDARY_OUTPUT_DIR environment variable.

    protoc-bridge takes care of creating the temporary directory and setting up the environment variable before invoking protoc. If protoc is ran manually (for example, through the CLI), it is the user's responsibility to create a directory for secondary outputs and pass it as an environment variable to protoc. It's worth noting that ScalaPB only looks for secondary output directory if a preprocessor is requested, and therefore for the most part users do not need to worry about secondary output directories.

    In ScalaPB's code base, SecondaryOutputProvider provides a method to find the secondary output directory as described above.

    Preprocessors#

    Preprocessors are protoc plugins that provide secondary outputs that are consumed by ScalaPB. ScalaPB expects the secondary output to be a google.protobuf.Any that encodes a PreprocessorOutput. The message contains a map between proto file names (as given by FileDescriptor#getFullName()) to additional ScalaPbOptions that are merged with the files options. By appending to aux_field_options, a preprocessor can, for example, impact the generated types of ScalaPB fields.

    • ScalaPB applies the provided options to a proto file only if the original file lists the preprocessor secondary output filename in a preprocessors file-level option. That option can be inherited from a package-scoped option.
    • To exclude a specific file from being preprocessed (if it would be otherwise impacted by a package-scoped option), add a -NAME entry to the list of preprocessors where NAME is the name of the preprocessor's secondary output.
    • In case of multiple preprocessors, options of later preprocessors overrides the one of earlier processors. Options in the file are merged over the preprocessor's options. When merging, repeated fields get concatenated.
    • Preprocessor plugins need to be invoked (in PB.targets or protoc's command line) before ScalaPB, so when ScalaPB runs their output is available.
    • Plugins that depend on ScalaPB (such as scalapb-validate) rely on DescriptorImplicits which consume the preprocessor output and therefore also see the updated options.

    Summary#

    If you followed this guide all the way to here, then congratulations for creating your first protoc plugin in Scala!

    If you have any questions, feel free to reach out to us on Gitter or Github.

    Did you write an interesting protoc plugin? Let us know on our gitter channel or our Google group and we'd love to mention it here!

    - + diff --git a/index.html b/index.html index 9bb1ff77b..94282e8d5 100644 --- a/index.html +++ b/index.html @@ -10,7 +10,7 @@ ScalaPB: Protocol Buffer Compiler for Scala | ScalaPB - + @@ -20,7 +20,7 @@

    Your Docusaurus site did not load properly.

    A very common reason is a wrong site baseUrl configuration.

    Current configured baseUrl = / (default value)

    We suggest trying baseUrl =

    Protocol Buffer Compiler for Scala

    Easy to Use

    ScalaPB translates Protocol Buffers to Scala case classes. The generated API is easy to use!

    Supports proto2 and proto3

    ScalaPB is built as a protoc plugin and has perfect compatibility with the protobuf language specification.

    Nested updates

    Updating immutable nested structure is made easy by an optional lenses support. Learn more.

    Interoperate with Java

    Scala Protocol Buffers can be converted to Java and vice versa. Scala and Java protobufs can co-exist in the same project to make it easier to gradually migrate, or interact with legacy Java APIs.

    Scala.js support

    ScalaPB fully supports Scala.js so you can write Scala programs that use your domain-specific Protocol Buffers in the browser! Learn more.

    gRPC

    Build gRPC servers and clients with ScalaPB. ScalaPB ships with its own wrapper around the official gRPC Java implementation. There are gRPC libraries for ZIO, Cats Effect and Akka.

    - + diff --git a/runtime~main.5122a3b5.js b/runtime~main.fd0676eb.js similarity index 65% rename from runtime~main.5122a3b5.js rename to runtime~main.fd0676eb.js index d833eb8eb..d350f6846 100644 --- a/runtime~main.5122a3b5.js +++ b/runtime~main.fd0676eb.js @@ -1 +1 @@ -!function(e){function a(a){for(var f,n,d=a[0],o=a[1],b=a[2],u=0,l=[];u