Skip to content

Commit

Permalink
Exposes language parameter in TransformPiiText (#2996)
Browse files Browse the repository at this point in the history
  • Loading branch information
nickzelei authored Nov 26, 2024
1 parent 58a4813 commit 4cab8af
Show file tree
Hide file tree
Showing 18 changed files with 888 additions and 733 deletions.
1,163 changes: 589 additions & 574 deletions backend/gen/go/protos/mgmt/v1alpha1/transformer.pb.go

Large diffs are not rendered by default.

15 changes: 12 additions & 3 deletions backend/internal/cmds/mgmt/serve/connect/cmd.go
Original file line number Diff line number Diff line change
Expand Up @@ -560,9 +560,10 @@ func serve(ctx context.Context) error {
)

anonymizationService := v1alpha1_anonymizationservice.New(&v1alpha1_anonymizationservice.Config{
IsPresidioEnabled: ncloudlicense.IsValid(),
IsAuthEnabled: isAuthEnabled,
IsNeosyncCloud: ncloudlicense.IsValid(),
IsPresidioEnabled: ncloudlicense.IsValid(),
PresidioDefaultLanguage: getPresidioDefaultLanguage(),
IsAuthEnabled: isAuthEnabled,
IsNeosyncCloud: ncloudlicense.IsValid(),
}, anonymizerMeter, useraccountService, presAnalyzeClient, presAnonClient, db)
api.Handle(
mgmtv1alpha1connect.NewAnonymizationServiceHandler(
Expand Down Expand Up @@ -632,6 +633,14 @@ func serve(ctx context.Context) error {
return nil
}

func getPresidioDefaultLanguage() *string {
lang := viper.GetString("PRESIDIO_DEFAULT_LANGUAGE")
if lang == "" {
return nil
}
return &lang
}

func getPromClientFromEnvironment() (promapi.Client, error) {
roundTripper := promapi.DefaultRoundTripper
promApiKey := getPromApiKey()
Expand Down
5 changes: 5 additions & 0 deletions backend/protos/mgmt/v1alpha1/transformer.proto
Original file line number Diff line number Diff line change
Expand Up @@ -248,6 +248,11 @@ message TransformPiiText {

// Exact match of PII phrases identified by the analysis engine.
repeated string allowed_phrases = 5;

// Two characters for the desired language in ISO_639-1 format.
// If not provided, will use a default language if specified on the server.
// Depending on the server configuration, may attempt to autodetect from input.
optional string language = 6;
}

message PiiDenyRecognizer {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ func (s *Service) AnonymizeMany(
jsonanonymizer.WithTransformerMappings(req.Msg.TransformerMappings),
jsonanonymizer.WithDefaultTransformers(req.Msg.DefaultTransformers),
jsonanonymizer.WithHaltOnFailure(req.Msg.HaltOnFailure),
jsonanonymizer.WithConditionalAnonymizeConfig(s.cfg.IsPresidioEnabled, s.analyze, s.anonymize),
jsonanonymizer.WithConditionalAnonymizeConfig(s.cfg.IsPresidioEnabled, s.analyze, s.anonymize, s.cfg.PresidioDefaultLanguage),
)
if err != nil {
return nil, err
Expand Down Expand Up @@ -165,7 +165,7 @@ func (s *Service) AnonymizeSingle(
anonymizer, err := jsonanonymizer.NewAnonymizer(
jsonanonymizer.WithTransformerMappings(req.Msg.TransformerMappings),
jsonanonymizer.WithDefaultTransformers(req.Msg.DefaultTransformers),
jsonanonymizer.WithConditionalAnonymizeConfig(s.cfg.IsPresidioEnabled, s.analyze, s.anonymize),
jsonanonymizer.WithConditionalAnonymizeConfig(s.cfg.IsPresidioEnabled, s.analyze, s.anonymize, s.cfg.PresidioDefaultLanguage),
)
if err != nil {
return nil, err
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,10 @@ type Service struct {
}

type Config struct {
IsAuthEnabled bool
IsPresidioEnabled bool
IsNeosyncCloud bool
IsAuthEnabled bool
IsPresidioEnabled bool
PresidioDefaultLanguage *string
IsNeosyncCloud bool
}

func New(
Expand Down
22 changes: 22 additions & 0 deletions docs/openapi/mgmt/v1alpha1/anonymization.openapi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1183,6 +1183,21 @@ components:
additionalProperties: false
mgmt.v1alpha1.TransformPiiText:
type: object
allOf:
- anyOf:
- required:
- language
- not:
anyOf:
- required:
- language
anyOf:
- required:
- language
- not:
anyOf:
- required:
- language
properties:
scoreThreshold:
type: number
Expand Down Expand Up @@ -1215,6 +1230,13 @@ components:
type: string
title: allowed_phrases
description: Exact match of PII phrases identified by the analysis engine.
language:
type: string
title: language
description: |-
Two characters for the desired language in ISO_639-1 format.
If not provided, will use a default language if specified on the server.
Depending on the server configuration, may attempt to autodetect from input.
title: TransformPiiText
additionalProperties: false
description: NeosyncCloud/Enterprise only transformer for anonymizing PII Text
Expand Down
22 changes: 22 additions & 0 deletions docs/openapi/mgmt/v1alpha1/job.openapi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5056,6 +5056,21 @@ components:
additionalProperties: false
mgmt.v1alpha1.TransformPiiText:
type: object
allOf:
- anyOf:
- required:
- language
- not:
anyOf:
- required:
- language
anyOf:
- required:
- language
- not:
anyOf:
- required:
- language
properties:
scoreThreshold:
type: number
Expand Down Expand Up @@ -5088,6 +5103,13 @@ components:
type: string
title: allowed_phrases
description: Exact match of PII phrases identified by the analysis engine.
language:
type: string
title: language
description: |-
Two characters for the desired language in ISO_639-1 format.
If not provided, will use a default language if specified on the server.
Depending on the server configuration, may attempt to autodetect from input.
title: TransformPiiText
additionalProperties: false
description: NeosyncCloud/Enterprise only transformer for anonymizing PII Text
Expand Down
22 changes: 22 additions & 0 deletions docs/openapi/mgmt/v1alpha1/transformer.openapi.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -1703,6 +1703,21 @@ components:
additionalProperties: false
mgmt.v1alpha1.TransformPiiText:
type: object
allOf:
- anyOf:
- required:
- language
- not:
anyOf:
- required:
- language
anyOf:
- required:
- language
- not:
anyOf:
- required:
- language
properties:
scoreThreshold:
type: number
Expand Down Expand Up @@ -1735,6 +1750,13 @@ components:
type: string
title: allowed_phrases
description: Exact match of PII phrases identified by the analysis engine.
language:
type: string
title: language
description: |-
Two characters for the desired language in ISO_639-1 format.
If not provided, will use a default language if specified on the server.
Depending on the server configuration, may attempt to autodetect from input.
title: TransformPiiText
additionalProperties: false
description: NeosyncCloud/Enterprise only transformer for anonymizing PII Text
Expand Down
22 changes: 22 additions & 0 deletions docs/openapi/neosync.mgmt.v1alpha1.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -5155,6 +5155,21 @@ components:
additionalProperties: false
mgmt.v1alpha1.TransformPiiText:
type: object
allOf:
- anyOf:
- required:
- language
- not:
anyOf:
- required:
- language
anyOf:
- required:
- language
- not:
anyOf:
- required:
- language
properties:
scoreThreshold:
type: number
Expand Down Expand Up @@ -5190,6 +5205,13 @@ components:
type: string
title: allowed_phrases
description: Exact match of PII phrases identified by the analysis engine.
language:
type: string
title: language
description: |-
Two characters for the desired language in ISO_639-1 format.
If not provided, will use a default language if specified on the server.
Depending on the server configuration, may attempt to autodetect from input.
title: TransformPiiText
additionalProperties: false
description: NeosyncCloud/Enterprise only transformer for anonymizing PII Text
Expand Down
2 changes: 1 addition & 1 deletion docs/protos/mgmt/v1alpha1/transformer.proto.mdx
Original file line number Diff line number Diff line change
Expand Up @@ -282,7 +282,7 @@ _**package** mgmt.v1alpha1_


### `TransformPiiText`
<ProtoMessage key={66} message={{"name":"TransformPiiText","longName":"TransformPiiText","fullName":"mgmt.v1alpha1.TransformPiiText","description":"NeosyncCloud/Enterprise only transformer for anonymizing PII Text","hasExtensions":false,"hasFields":true,"hasOneofs":false,"extensions":[],"fields":[{"name":"score_threshold","description":"Minimal detection score for determining PII. 0.0-1.0","label":"","type":"float","longType":"float","fullType":"float","ismap":false,"isoneof":false,"oneofdecl":"","defaultValue":""},{"name":"default_anonymizer","description":"The default anonmyization config used for all instances of detected PII.","label":"","type":"PiiAnonymizer","longType":"PiiAnonymizer","fullType":"mgmt.v1alpha1.PiiAnonymizer","ismap":false,"isoneof":false,"oneofdecl":"","defaultValue":"","typeLink":"/api/mgmt/v1alpha1/transformer.proto#piianonymizer"},{"name":"deny_recognizers","description":"Configure deny lists where each word is treated as PII.","label":"repeated","type":"PiiDenyRecognizer","longType":"PiiDenyRecognizer","fullType":"mgmt.v1alpha1.PiiDenyRecognizer","ismap":false,"isoneof":false,"oneofdecl":"","defaultValue":"","typeLink":"/api/mgmt/v1alpha1/transformer.proto#piidenyrecognizer"},{"name":"allowed_entities","description":"Configure a list of entities to be used for PII analysis. If not provided or empty, all entities are considiered\nIf this is specified, any ad-hoc, or deny_recognizers entity names must also be provided.\nTo see available builtin entities, call the GetPiiTextEntities() RPC method to see what is available for your account.","label":"repeated","type":"string","longType":"string","fullType":"string","ismap":false,"isoneof":false,"oneofdecl":"","defaultValue":""},{"name":"allowed_phrases","description":"Exact match of PII phrases identified by the analysis engine.","label":"repeated","type":"string","longType":"string","fullType":"string","ismap":false,"isoneof":false,"oneofdecl":"","defaultValue":""}]}} />
<ProtoMessage key={66} message={{"name":"TransformPiiText","longName":"TransformPiiText","fullName":"mgmt.v1alpha1.TransformPiiText","description":"NeosyncCloud/Enterprise only transformer for anonymizing PII Text","hasExtensions":false,"hasFields":true,"hasOneofs":true,"extensions":[],"fields":[{"name":"score_threshold","description":"Minimal detection score for determining PII. 0.0-1.0","label":"","type":"float","longType":"float","fullType":"float","ismap":false,"isoneof":false,"oneofdecl":"","defaultValue":""},{"name":"default_anonymizer","description":"The default anonmyization config used for all instances of detected PII.","label":"","type":"PiiAnonymizer","longType":"PiiAnonymizer","fullType":"mgmt.v1alpha1.PiiAnonymizer","ismap":false,"isoneof":false,"oneofdecl":"","defaultValue":"","typeLink":"/api/mgmt/v1alpha1/transformer.proto#piianonymizer"},{"name":"deny_recognizers","description":"Configure deny lists where each word is treated as PII.","label":"repeated","type":"PiiDenyRecognizer","longType":"PiiDenyRecognizer","fullType":"mgmt.v1alpha1.PiiDenyRecognizer","ismap":false,"isoneof":false,"oneofdecl":"","defaultValue":"","typeLink":"/api/mgmt/v1alpha1/transformer.proto#piidenyrecognizer"},{"name":"allowed_entities","description":"Configure a list of entities to be used for PII analysis. If not provided or empty, all entities are considiered\nIf this is specified, any ad-hoc, or deny_recognizers entity names must also be provided.\nTo see available builtin entities, call the GetPiiTextEntities() RPC method to see what is available for your account.","label":"repeated","type":"string","longType":"string","fullType":"string","ismap":false,"isoneof":false,"oneofdecl":"","defaultValue":""},{"name":"allowed_phrases","description":"Exact match of PII phrases identified by the analysis engine.","label":"repeated","type":"string","longType":"string","fullType":"string","ismap":false,"isoneof":false,"oneofdecl":"","defaultValue":""},{"name":"language","description":"Two characters for the desired language in ISO_639-1 format.\nIf not provided, will use a default language if specified on the server.\nDepending on the server configuration, may attempt to autodetect from input.","label":"optional","type":"string","longType":"string","fullType":"string","ismap":false,"isoneof":true,"oneofdecl":"_language","defaultValue":""}]}} />


### `TransformString`
Expand Down
14 changes: 13 additions & 1 deletion docs/protos/proto_docs.json
Original file line number Diff line number Diff line change
Expand Up @@ -2180,7 +2180,7 @@
"description": "NeosyncCloud/Enterprise only transformer for anonymizing PII Text",
"hasExtensions": false,
"hasFields": true,
"hasOneofs": false,
"hasOneofs": true,
"extensions": [],
"fields": [
{
Expand Down Expand Up @@ -2242,6 +2242,18 @@
"isoneof": false,
"oneofdecl": "",
"defaultValue": ""
},
{
"name": "language",
"description": "Two characters for the desired language in ISO_639-1 format.\nIf not provided, will use a default language if specified on the server.\nDepending on the server configuration, may attempt to autodetect from input.",
"label": "optional",
"type": "string",
"longType": "string",
"fullType": "string",
"ismap": false,
"isoneof": true,
"oneofdecl": "_language",
"defaultValue": ""
}
]
},
Expand Down
10 changes: 10 additions & 0 deletions frontend/packages/sdk/src/client/mgmt/v1alpha1/transformer_pb.ts
Original file line number Diff line number Diff line change
Expand Up @@ -1765,6 +1765,15 @@ export class TransformPiiText extends Message<TransformPiiText> {
*/
allowedPhrases: string[] = [];

/**
* Two characters for the desired language in ISO_639-1 format.
* If not provided, will use a default language if specified on the server.
* Depending on the server configuration, may attempt to autodetect from input.
*
* @generated from field: optional string language = 6;
*/
language?: string;

constructor(data?: PartialMessage<TransformPiiText>) {
super();
proto3.util.initPartial(data, this);
Expand All @@ -1778,6 +1787,7 @@ export class TransformPiiText extends Message<TransformPiiText> {
{ no: 3, name: "deny_recognizers", kind: "message", T: PiiDenyRecognizer, repeated: true },
{ no: 4, name: "allowed_entities", kind: "scalar", T: 9 /* ScalarType.STRING */, repeated: true },
{ no: 5, name: "allowed_phrases", kind: "scalar", T: 9 /* ScalarType.STRING */, repeated: true },
{ no: 6, name: "language", kind: "scalar", T: 9 /* ScalarType.STRING */, opt: true },
]);

static fromBinary(bytes: Uint8Array, options?: Partial<BinaryReadOptions>): TransformPiiText {
Expand Down
2 changes: 1 addition & 1 deletion internal/ee/transformers/functions/functions.go
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ func TransformPiiText(
allowedEntities := config.GetAllowedEntities()
analyzeResp, err := analyzeClient.PostAnalyzeWithResponse(ctx, presidioapi.AnalyzeRequest{
Text: value,
Language: supportedLanguage,
Language: config.GetLanguage(),
ScoreThreshold: &threshold,
AdHocRecognizers: &adhocRecognizers,
Entities: &allowedEntities,
Expand Down
16 changes: 9 additions & 7 deletions internal/json-anonymizer/json-anonymizer.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,8 +30,9 @@ type JsonAnonymizer struct {
}

type anonymizeConfig struct {
analyze presidioapi.AnalyzeInterface
anonymize presidioapi.AnonymizeInterface
analyze presidioapi.AnalyzeInterface
anonymize presidioapi.AnonymizeInterface
defaultLanguage *string
}

// Option is a functional option for configuring the Anonymizer
Expand Down Expand Up @@ -73,12 +74,13 @@ func NewAnonymizer(opts ...Option) (*JsonAnonymizer, error) {
}

// WithAnonymizeConfig sets the analyze and anonymize clients for use by the presidio transformers only if isEnabled is true
func WithConditionalAnonymizeConfig(isEnabled bool, analyze presidioapi.AnalyzeInterface, anonymize presidioapi.AnonymizeInterface) Option {
func WithConditionalAnonymizeConfig(isEnabled bool, analyze presidioapi.AnalyzeInterface, anonymize presidioapi.AnonymizeInterface, defaultLanguage *string) Option {
return func(ja *JsonAnonymizer) {
if isEnabled && analyze != nil && anonymize != nil {
ja.anonymizeConfig = &anonymizeConfig{
analyze: analyze,
anonymize: anonymize,
analyze: analyze,
anonymize: anonymize,
defaultLanguage: defaultLanguage,
}
}
}
Expand Down Expand Up @@ -330,7 +332,7 @@ func initTransformerExecutors(
executors := []*transformer.TransformerExecutor{}
execOpts := []transformer.TransformerExecutorOption{}
if anonymizeConfig != nil && anonymizeConfig.analyze != nil && anonymizeConfig.anonymize != nil {
execOpts = append(execOpts, transformer.WithTransformPiiTextConfig(anonymizeConfig.analyze, anonymizeConfig.anonymize))
execOpts = append(execOpts, transformer.WithTransformPiiTextConfig(anonymizeConfig.analyze, anonymizeConfig.anonymize, anonymizeConfig.defaultLanguage))
}

for _, mapping := range transformerMappings {
Expand All @@ -356,7 +358,7 @@ func initDefaultTransformerExecutors(
) (*DefaultExecutors, error) {
execOpts := []transformer.TransformerExecutorOption{}
if anonymizeConfig != nil && anonymizeConfig.analyze != nil && anonymizeConfig.anonymize != nil {
execOpts = append(execOpts, transformer.WithTransformPiiTextConfig(anonymizeConfig.analyze, anonymizeConfig.anonymize))
execOpts = append(execOpts, transformer.WithTransformPiiTextConfig(anonymizeConfig.analyze, anonymizeConfig.anonymize, anonymizeConfig.defaultLanguage))
}

var stringExecutor, numberExecutor, booleanExecutor *transformer.TransformerExecutor
Expand Down
264 changes: 132 additions & 132 deletions python/src/neosync/mgmt/v1alpha1/transformer_pb2.py

Large diffs are not rendered by default.

Loading

0 comments on commit 4cab8af

Please sign in to comment.