-
Notifications
You must be signed in to change notification settings - Fork 0
/
app.py
119 lines (90 loc) · 3.33 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
#!/usr/bin/env python3
"""An app to find (the first four) images matching a text string, and display them.
"""
import logging
import os
from typing import Annotated, Union
import clip
import psycopg
import torch
from pathlib import Path
from dotenv import load_dotenv
from fastapi import FastAPI, Request, Form
from fastapi.responses import HTMLResponse
from fastapi.staticfiles import StaticFiles
from fastapi.templating import Jinja2Templates
app = FastAPI()
templates = Jinja2Templates(directory="templates")
# Our images are kept locally, so make them available to the `img` tag
app.mount("/photos", StaticFiles(directory="photos"), name="photos")
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s %(name)s %(levelname)s: %(message)s',
)
logger = logging.getLogger(__name__)
load_dotenv()
SERVICE_URI = os.getenv("PG_SERVICE_URI")
# Load the open CLIP model
# If we download it remotely, it will default to being cached in ~/.cache/clip
LOCAL_MODEL = Path('./models/ViT-B-32.pt').absolute()
MODEL_NAME = 'ViT-B/32'
DEVICE = "cuda" if torch.cuda.is_available() else "cpu"
if LOCAL_MODEL.exists():
logger.info(f'Importing CLIP model {MODEL_NAME} from {LOCAL_MODEL.parent}')
logger.info(f'Using {DEVICE}')
model, preprocess = clip.load(MODEL_NAME, device=DEVICE, download_root=LOCAL_MODEL.parent)
else:
logger.info(f'Importing CLIP model {MODEL_NAME}')
logger.info(f'Using {DEVICE}')
model, preprocess = clip.load(MODEL_NAME, device=DEVICE)
def get_single_embedding(text):
with torch.no_grad():
# Encode the text to compute the feature vector and normalize it
text_input = clip.tokenize([text]).to(DEVICE)
text_features = model.encode_text(text_input)
text_features /= text_features.norm(dim=-1, keepdim=True)
# Return the feature vector
return text_features.cpu().numpy()[0]
def vector_to_string(embedding):
"""Convert our (ndarry) embedding vector into a string that SQL can use.
"""
vector_str = ", ".join(str(x) for x in embedding.tolist())
vector_str = f'[{vector_str}]'
return vector_str
def search_for_matches(text):
logger.info(f'Searching for {text!r}')
vector = get_single_embedding(text)
embedding_string = vector_to_string(vector)
# Perform search
try:
with psycopg.connect(SERVICE_URI) as conn:
with conn.cursor() as cur:
cur.execute(
"SELECT * FROM pictures ORDER BY embedding <-> %s LIMIT 4;",
(embedding_string,),
)
rows = cur.fetchall()
return [row[0] for row in rows]
except Exception as exc:
print(f'{exc.__class__.__name__}: {exc}')
return []
@app.get("/", response_class=HTMLResponse)
async def index(request: Request):
return templates.TemplateResponse(
request=request,
name="index.html",
context={
"search_hint": "Find images like...",
},
)
@app.post("/search_form", response_class=HTMLResponse)
async def search_form(request: Request, search_text: Annotated[str, Form()]):
logging.info(f'Search form requests {search_text!r}')
images = search_for_matches(search_text)
return templates.TemplateResponse(
request=request,
name="images.html",
context={
"images": images,
}
)