Skip to content

Commit

Permalink
More reductions to ease code review
Browse files Browse the repository at this point in the history
  • Loading branch information
odeke-em committed Feb 3, 2025
1 parent aa6a6c2 commit 130bc46
Show file tree
Hide file tree
Showing 3 changed files with 2 additions and 221 deletions.
1 change: 0 additions & 1 deletion src/langchain_google_spanner/vector_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -440,7 +440,6 @@ def _generate_sql(
- str: The generated SQL.
"""

# 1. If any of the columns is a VectorSearchIndex
embedding_config = list(
filter(lambda x: x.name == embedding_column, column_configs)
)
Expand Down
2 changes: 1 addition & 1 deletion tests/integration/test_spanner_loader.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
import uuid

import pytest
from google.cloud.spanner import Client
from google.cloud.spanner import Client # type: ignore
from langchain_core.documents import Document

from langchain_google_spanner.loader import Column, SpannerDocumentSaver, SpannerLoader
Expand Down
220 changes: 1 addition & 219 deletions tests/integration/test_spanner_vector_store.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,16 +26,13 @@
QueryParameters,
SpannerVectorStore,
TableColumn,
VectorSearchIndex,
)

project_id = os.environ["PROJECT_ID"]
instance_id = os.environ["INSTANCE_ID"]
google_database = os.environ["GOOGLE_DATABASE"]
pg_database = os.environ.get("PG_DATABASE", None)
zone = os.environ.get("GOOGLE_DATABASE_ZONE", "us-west2")
pg_database = os.environ["PG_DATABASE"]
table_name = "test_table" + str(uuid.uuid4()).replace("-", "_")
table_name_ANN = "products"


OPERATION_TIMEOUT_SECONDS = 240
Expand Down Expand Up @@ -392,221 +389,6 @@ def test_spanner_vector_search_data4(self, setup_database):
assert len(docs) == 3


class TestSpannerVectorStoreGoogleSQL_ANN:
@pytest.fixture(scope="class")
def setup_database(self, client):
distance_strategy = DistanceStrategy.COSINE
SpannerVectorStore.init_vector_store_table(
instance_id=instance_id,
database_id=google_database,
table_name=table_name_ANN,
id_column=TableColumn("productId", type="INT64"),
vector_size=758,
embedding_column=TableColumn(
name="productDescriptionEmbedding",
type="ARRAY<FLOAT32>",
is_null=True,
),
metadata_columns=[
TableColumn(name="categoryId", type="INT64", is_null=False),
TableColumn(name="productName", type="STRING(MAX)", is_null=False),
TableColumn(
name="productDescription", type="STRING(MAX)", is_null=False
),
TableColumn(name="inventoryCount", type="INT64", is_null=False),
TableColumn(name="priceInCents", type="INT64", is_null=True),
],
secondary_indexes=[
VectorSearchIndex(
index_name="ProductDescriptionEmbeddingIndex",
columns=["productDescriptionEmbedding"],
nullable_column=True,
num_branches=1000,
tree_depth=3,
index_type=distance_strategy,
num_leaves=100000,
),
],
)

raw_data = [
(
1,
1,
"Cymbal Helios Helmet",
"Safety meets style with the Cymbal children's bike helmet. Its lightweight design, superior ventilation, and adjustable fit ensure comfort and protection on every ride. Stay bright and keep your child safe under the sun with Cymbal Helios!",
100,
10999,
),
(
1,
2,
"Cymbal Sprout",
"Let their cycling journey begin with the Cymbal Sprout, the ideal balance bike for beginning riders ages 2-4 years. Its lightweight frame, low seat height, and puncture-proof tires promote stability and confidence as little ones learn to balance and steer. Watch them sprout into cycling enthusiasts with Cymbal Sprout!",
10,
13999,
),
(
1,
3,
"Cymbal Spark Jr.",
"Light, vibrant, and ready for adventure, the Spark Jr. is the perfect first bike for young riders (ages 5-8). Its sturdy frame, easy-to-use brakes, and puncture-resistant tires inspire confidence and endless playtime. Let the spark of cycling ignite with Cymbal!",
34,
13900,
),
(
1,
4,
"Cymbal Summit",
"Conquering trails is a breeze with the Summit mountain bike. Its lightweight aluminum frame, responsive suspension, and powerful disc brakes provide exceptional control and comfort for experienced bikers navigating rocky climbs or shredding downhill. Reach new heights with Cymbal Summit!",
0,
79999,
),
(
1,
5,
"Cymbal Breeze",
"Cruise in style and embrace effortless pedaling with the Breeze electric bike. Its whisper-quiet motor and long-lasting battery let you conquer hills and distances with ease. Enjoy scenic rides, commutes, or errands with a boost of confidence from Cymbal Breeze!",
72,
129999,
),
(
1,
6,
"Cymbal Trailblazer Backpack",
"Carry all your essentials in style with the Trailblazer backpack. Its water-resistant material, multiple compartments, and comfortable straps keep your gear organized and accessible, allowing you to focus on the adventure. Blaze new trails with Cymbal Trailblazer!",
24,
7999,
),
(
1,
7,
"Cymbal Phoenix Lights",
"See and be seen with the Phoenix bike lights. Powerful LEDs and multiple light modes ensure superior visibility, enhancing your safety and enjoyment during day or night rides. Light up your journey with Cymbal Phoenix!",
87,
3999,
),
(
1,
8,
"Cymbal Windstar Pump",
"Flat tires are no match for the Windstar pump. Its compact design, lightweight construction, and high-pressure capacity make inflating tires quick and effortless. Get back on the road in no time with Cymbal Windstar!",
36,
24999,
),
(
1,
9,
"Cymbal Odyssey Multi-Tool",
"Be prepared for anything with the Odyssey multi-tool. This handy gadget features essential tools like screwdrivers, hex wrenches, and tire levers, keeping you ready for minor repairs and adjustments on the go. Conquer your journey with Cymbal Odyssey!",
52,
999,
),
(
1,
10,
"Cymbal Nomad Water Bottle",
"Stay hydrated on every ride with the Nomad water bottle. Its sleek design, BPA-free construction, and secure lock lid make it the perfect companion for staying refreshed and motivated throughout your adventures. Hydrate and explore with Cymbal Nomad!",
42,
1299,
),
]

columns = [
"categoryId",
"productId",
"productName",
"productDescription",
"createTime",
"inventoryCount",
"priceInCents",
]

model_ddl_statements = [
f"""
CREATE MODEL IF NOT EXISTS EmbeddingsModel INPUT(
content STRING(MAX),
) OUTPUT(
embeddings STRUCT<statistics STRUCT<truncated BOOL, token_count FLOAT32>, values ARRAY<FLOAT32>>,
) REMOTE OPTIONS (
endpoint = '//aiplatform.googleapis.com/projects/{project_id}/locations/{zone}/publishers/google/models/text-embedding-004'
)
""",
f"""
CREATE MODEL IF NOT EXISTS LLMModel INPUT(
prompt STRING(MAX),
) OUTPUT(
content STRING(MAX),
) REMOTE OPTIONS (
endpoint = '//aiplatform.googleapis.com/projects/{project_id}/locations/{zone}/publishers/google/models/gemini-pro',
default_batch_size = 1
)
""",
"""
UPDATE products p1
SET productDescriptionEmbedding =
(
SELECT embeddings.values from ML.PREDICT(
MODEL EmbeddingsModel,
(SELECT productDescription as content FROM products p2 where p2.productId=p1.productId)
)
)
WHERE categoryId=1
""",
]
database = client.instance(instance_id).database(google_database)

def create_models():
operation = database.update_ddl(model_ddl_statements)
return operation.result(OPERATION_TIMEOUT_SECONDS)

def get_embeddings(self):
sql = """SELECT embeddings.values FROM ML.PREDICT(
MODEL EmbeddingsModel,
(SELECT "I'd like to buy a starter bike for my 3 year old child" as content)
)"""

with database.snapshot() as snapshot:
res = snapshot.execute_sql(sql)
return list(res)

yield raw_data, columns, create_models, get_embeddings

print("\nPerforming GSQL cleanup after each ANN test...")

operation = database.update_ddl(
[
f"DROP TABLE IF EXISTS {table_name_ANN}",
"DROP MODEL IF EXISTS EmbeddingsModel",
"DROP MODEL IF EXISTS LLMModel",
"DROP Index IF EXISTS ProductDescriptionEmbeddingIndex",
]
)
if False: # Creating a vector index takes 30+ minutes, so avoiding this.
operation.result(OPERATION_TIMEOUT_SECONDS)

# Code to perform teardown after each test goes here
print("\nGSQL Cleanup complete.")

def test_ann_add_data1(self, setup_database):
raw_data, columns, create_models, get_embeddings = setup_database

# Retrieve embeddings using ML_PREDICT.
embeddings = get_embeddings()
print("embeddings", embeddings)

db = SpannerVectorStore(
instance_id=instance_id,
database_id=google_database,
table_name=table_name_ANN,
id_column="categoryId",
ignore_metadata_columns=[],
embedding_service=embeddings,
metadata_json_column="metadata",
)
_ = db


class TestSpannerVectorStorePGSQL:
@pytest.fixture(scope="class")
def setup_database(self, client):
Expand Down

0 comments on commit 130bc46

Please sign in to comment.