Skip to content

Commit

Permalink
Update SharpVector.OpenAI to 2.0.0 with save/load functionality support
Browse files Browse the repository at this point in the history
  • Loading branch information
crpietschmann committed Feb 23, 2025
1 parent 21ebf9d commit dbc8776
Show file tree
Hide file tree
Showing 4 changed files with 27 additions and 19 deletions.
4 changes: 2 additions & 2 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,11 +5,11 @@ All notable changes to this project will be documented in this file.
The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.1.0/),
and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).

## v2.0.0 (In Progress)
## v2.0.0

Added:

- Add data persistence capability to save/load from a file or to/from a `Stream`
- Add data persistence capability to save/load from a file or to/from a `Stream` (Both SharpVector and SharpVector.OpenAI)
- Add Chinese language/character support

Breaking Change:
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -3,13 +3,15 @@
using Build5Nines.SharpVector.VectorStore;
using System.Collections.Concurrent;
using OpenAI.Embeddings;
using System.IO.Compression;
using System.Text.Json;

namespace Build5Nines.SharpVector.OpenAI;

public abstract class OpenAIMemoryVectorDatabaseBase<TId, TMetadata, TVectorStore, TIdGenerator, TVectorComparer>
: IVectorDatabase<TId, TMetadata>
where TId : notnull
where TVectorStore : IVectorStore<TId, TMetadata>
where TVectorStore : IVectorStore<TId, TMetadata, string>
where TIdGenerator : IIdGenerator<TId>, new()
where TVectorComparer : IVectorComparer, new()
{
Expand Down Expand Up @@ -76,7 +78,7 @@ public IEnumerable<TId> GetIds()
/// <param name="id"></param>
/// <returns></returns>
/// <exception cref="KeyNotFoundException"></exception>
public IVectorTextItem<TMetadata> GetText(TId id)
public IVectorTextItem<string, TMetadata> GetText(TId id)
{
return VectorStore.Get(id);
}
Expand All @@ -86,7 +88,7 @@ public IVectorTextItem<TMetadata> GetText(TId id)
/// </summary>
/// <param name="id"></param>
/// <exception cref="KeyNotFoundException"></exception>
public IVectorTextItem<TMetadata> DeleteText(TId id)
public IVectorTextItem<string, TMetadata> DeleteText(TId id)
{
return VectorStore.Delete(id);
}
Expand Down Expand Up @@ -193,7 +195,7 @@ public void UpdateTextAndMetadata(TId id, string text, TMetadata metadata)
/// <param name="topN">The highest number of results to show.</param>
/// <param name="threshold">The similarity threshold. Only return items greater or equal to the threshold. Null returns all.</param>
/// <returns></returns>
public IVectorTextResult<TMetadata> Search(string queryText, float? threshold = null, int pageIndex = 0, int? pageCount = null)
public IVectorTextResult<string, TMetadata> Search(string queryText, float? threshold = null, int pageIndex = 0, int? pageCount = null)
{
return SearchAsync(queryText, threshold, pageIndex, pageCount).Result;
}
Expand All @@ -206,7 +208,7 @@ public IVectorTextResult<TMetadata> Search(string queryText, float? threshold =
/// <param name="pageIndex">The page index of the search results. Default is 0.</param>
/// <param name="pageCount">The number of search results per page. Default is Null and returns all results.</param>
/// <returns></returns>
public async Task<IVectorTextResult<TMetadata>> SearchAsync(string queryText, float? threshold = null, int pageIndex = 0, int? pageCount = null)
public async Task<IVectorTextResult<string, TMetadata>> SearchAsync(string queryText, float? threshold = null, int pageIndex = 0, int? pageCount = null)
{
var similarities = await CalculateVectorComparisonAsync(queryText, threshold);

Expand Down Expand Up @@ -260,10 +262,7 @@ public virtual async Task SerializeToJsonStreamAsync(Stream stream)
var streamVectorStore = new MemoryStream();
var streamVocabularyStore = new MemoryStream();

var taskVectorStore = VectorStore.SerializeToJsonStreamAsync(streamVectorStore);
var taskVocabularyStore = VectorStore.VocabularyStore.SerializeToJsonStreamAsync(streamVocabularyStore);

await Task.WhenAll(taskVectorStore, taskVocabularyStore);
await VectorStore.SerializeToJsonStreamAsync(streamVectorStore);

using (var archive = new ZipArchive(stream, ZipArchiveMode.Create, true))
{
Expand Down Expand Up @@ -330,19 +329,22 @@ public virtual async Task DeserializeFromJsonStreamAsync(Stream stream)

var databaseInfo = JsonSerializer.Deserialize<DatabaseInfo>(databaseInfoJson);

string SupportedVersion = "1.0.0";
string SupportedSchema = "Build5Nines.SharpVector";

if (databaseInfo == null)
{
throw new DatabaseFileInfoException("Database info entry is null.");
}

if (databaseInfo.Schema != DatabaseInfo.SupportedSchema)
if (databaseInfo.Schema != SupportedSchema)
{
throw new DatabaseFileSchemaException($"The database schema does not match the expected schema (Expected: {DatabaseInfo.SupportedSchema} - Actual: {databaseInfo.Schema}).");
throw new DatabaseFileSchemaException($"The database schema does not match the expected schema (Expected: {SupportedSchema} - Actual: {databaseInfo.Schema}).");
}

if (databaseInfo.Version != DatabaseInfo.SupportedVersion)
if (databaseInfo.Version != SupportedVersion)
{
throw new DatabaseFileVersionException($"The database version does not match the expected version (Expected: {DatabaseInfo.SupportedVersion} - Actual: {databaseInfo.Version}).");
throw new DatabaseFileVersionException($"The database version does not match the expected version (Expected: {SupportedVersion} - Actual: {databaseInfo.Version}).");
}

if (databaseInfo.ClassType != this.GetType().FullName)
Expand Down
5 changes: 1 addition & 4 deletions src/OpenAIConsoleTest/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -77,15 +77,12 @@ await Parallel.ForEachAsync(movies.EnumerateArray(), async (movie, cancellationT
Console.WriteLine(string.Empty);

if (newPrompt != null) {
IVectorTextResult<string> result;

var timer = new Stopwatch();
timer.Start();


var pageSize = 3;
// result = await vdb.Search(newPrompt,
result = await vdb.SearchAsync(newPrompt,
var result = await vdb.SearchAsync(newPrompt,
threshold: 0.001f, // 0.2f, // Cosine Similarity - Only return results with similarity greater than this threshold
// threshold: (float)1.4f, // Euclidean Distance - Only return results with distance less than this threshold

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,5 +29,14 @@ public void TestInitialization()
Assert.IsNotNull(_database);
}

[TestMethod]
public async Task Test_SaveLoad_01()
{
var filename = "openai_test_saveload_01.b59vdb";
await _database.SaveToFileAsync(filename);

Check warning on line 36 in src/SharpVectorOpenAITest/BasicOpenAIMemoryVectorDatabaseTest.cs

View workflow job for this annotation

GitHub Actions / build

Possible null reference argument for parameter 'vectorDatabase' in 'Task IVectorDatabaseExtensions.SaveToFileAsync<int, string, string>(IVectorDatabase<int, string, string> vectorDatabase, string filePath)'.

Check warning on line 36 in src/SharpVectorOpenAITest/BasicOpenAIMemoryVectorDatabaseTest.cs

View workflow job for this annotation

GitHub Actions / build

Possible null reference argument for parameter 'vectorDatabase' in 'Task IVectorDatabaseExtensions.SaveToFileAsync<int, string, string>(IVectorDatabase<int, string, string> vectorDatabase, string filePath)'.

Check warning on line 36 in src/SharpVectorOpenAITest/BasicOpenAIMemoryVectorDatabaseTest.cs

View workflow job for this annotation

GitHub Actions / build

Possible null reference argument for parameter 'vectorDatabase' in 'Task IVectorDatabaseExtensions.SaveToFileAsync<int, string, string>(IVectorDatabase<int, string, string> vectorDatabase, string filePath)'.

Check warning on line 36 in src/SharpVectorOpenAITest/BasicOpenAIMemoryVectorDatabaseTest.cs

View workflow job for this annotation

GitHub Actions / build

Possible null reference argument for parameter 'vectorDatabase' in 'Task IVectorDatabaseExtensions.SaveToFileAsync<int, string, string>(IVectorDatabase<int, string, string> vectorDatabase, string filePath)'.

Check warning on line 36 in src/SharpVectorOpenAITest/BasicOpenAIMemoryVectorDatabaseTest.cs

View workflow job for this annotation

GitHub Actions / build

Possible null reference argument for parameter 'vectorDatabase' in 'Task IVectorDatabaseExtensions.SaveToFileAsync<int, string, string>(IVectorDatabase<int, string, string> vectorDatabase, string filePath)'.

Check warning on line 36 in src/SharpVectorOpenAITest/BasicOpenAIMemoryVectorDatabaseTest.cs

View workflow job for this annotation

GitHub Actions / build

Possible null reference argument for parameter 'vectorDatabase' in 'Task IVectorDatabaseExtensions.SaveToFileAsync<int, string, string>(IVectorDatabase<int, string, string> vectorDatabase, string filePath)'.

Check warning on line 36 in src/SharpVectorOpenAITest/BasicOpenAIMemoryVectorDatabaseTest.cs

View workflow job for this annotation

GitHub Actions / build

Possible null reference argument for parameter 'vectorDatabase' in 'Task IVectorDatabaseExtensions.SaveToFileAsync<int, string, string>(IVectorDatabase<int, string, string> vectorDatabase, string filePath)'.

Check warning on line 36 in src/SharpVectorOpenAITest/BasicOpenAIMemoryVectorDatabaseTest.cs

View workflow job for this annotation

GitHub Actions / build

Possible null reference argument for parameter 'vectorDatabase' in 'Task IVectorDatabaseExtensions.SaveToFileAsync<int, string, string>(IVectorDatabase<int, string, string> vectorDatabase, string filePath)'.

await _database.LoadFromFileAsync(filename);
}

}
}

0 comments on commit dbc8776

Please sign in to comment.