Skip to content

Commit

Permalink
Merge pull request #4 from alexdeploy/moderation
Browse files Browse the repository at this point in the history
Added prompt simple moderation for violent content.
  • Loading branch information
alexdeploy authored May 10, 2023
2 parents 6693a5a + 3c2a29c commit 84d52e5
Show file tree
Hide file tree
Showing 4 changed files with 110 additions and 3 deletions.
47 changes: 47 additions & 0 deletions client/__tests__/Security.test.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
const { describe, it, expect } = require('@jest/globals');
const { moderationCheck } = require('../src/utils/security');

describe('Security check:', () => {

const response = {
data: {
"id": "modr-XXXXX",
"model": "text-moderation-001",
"results": [
{
"categories": {
"hate": false,
"hate/threatening": false,
"self-harm": false,
"sexual": false,
"sexual/minors": false,
"violence": false,
"violence/graphic": false
},
"category_scores": {
"hate": 0.18805529177188873,
"hate/threatening": 0.0001250059431185946,
"self-harm": 0.0003706029092427343,
"sexual": 0.0008735615410842001,
"sexual/minors": 0.0007470346172340214,
"violence": 0.0041268812492489815,
"violence/graphic": 0.00023186142789199948
},
"flagged": false
}
]
}
}

it('should return true only if all categories are false', async () => {

const result = await moderationCheck(response.data);
expect(result).toBe(true);
});

it('should return false if any category is true', async () => {
response.data.results[0].categories.hate = true;
const result = await moderationCheck(response.data);
expect(result).toBe(false);
});
});
17 changes: 16 additions & 1 deletion client/src/events/messageCreate.js
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
const { chat } = require('../utils/openai');
const { chat, moderation } = require('../utils/openai');
const { moderationCheck } = require('../utils/security');

module.exports = {
name: 'messageCreate',
Expand Down Expand Up @@ -41,6 +42,20 @@ module.exports = {
* @see OpenAI Safety best Practices: https://platform.openai.com/docs/guides/safety-best-practices
*/


/**
* MODERATE
*/
const classification = await moderation(prompt);

const moderationChecked = await moderationCheck(classification.data);

// If the message violates the Content Policy, return a warning message.
if(!moderationChecked){
await interactionReply.edit("Your message violates OpenAI's Content Policy. Please, try again.");
return;
}

// Get the response from the chatGPT-3
const response = await chat(prompt);

Expand Down
25 changes: 23 additions & 2 deletions client/src/utils/openai.js
Original file line number Diff line number Diff line change
Expand Up @@ -27,9 +27,15 @@ const model = {
BABBAGE: "",
CURIE: "curie",
DAVINCI: "text-davinci-003",
GPT3_TURBO: "gpt-3.5-turbo"
GPT3_TURBO: "gpt-3.5-turbo",
MODERATION: {
STABLE: "text-moderation-stable",
LATEST: "text-moderation-latest"
}
};



const role = {
USER: "user",
SYSTEM: "system",
Expand Down Expand Up @@ -117,7 +123,22 @@ const createChatCompletion = async (prompt) => {
}
}

/**
* CREATE MODERATION
* * Classifies if text violates OpenAI's Content Policy.
* @see Documentation https://platform.openai.com/docs/api-reference/moderations/create
*/
const createModeration = async (prompt) => {
const response = await openai.createModeration({
model: model.MODERATION.LATEST,
input: prompt,
});

return response;
}

module.exports = {
send: createCompletion,
chat: createChatCompletion
chat: createChatCompletion,
moderation: createModeration
}
24 changes: 24 additions & 0 deletions client/src/utils/security.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
/**
* MODERATION RESPONSE CHECK
* * Checks for any content policy violations in the response of OpenAI's Content Policy moderation.
* @param {*} moderation is an response object of OpenAI's Content Policy moderation.
* @see Moderation https://platform.openai.com/docs/guides/moderation/moderation
* @returns true if the moderation is safe, false if it violates the Content Policy.
*/

const moderationCheck = async (moderation) => {

// moderation results
const category_scores = moderation.results[0].category_scores;
const categories = moderation.results[0].categories;

const someIsTrue = Object.values(categories).some(valor => valor === true);

if(someIsTrue) return false;

return true;
}

module.exports = {
moderationCheck
}

0 comments on commit 84d52e5

Please sign in to comment.