Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Better screenshot with vision #47

Merged
merged 4 commits into from
Nov 26, 2023
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
5 changes: 5 additions & 0 deletions .changeset/warm-snails-applaud.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
---
"syncia": minor
---

Stable Screenshot tool with GPT Vision
2 changes: 1 addition & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,7 +12,7 @@ Syncia is a Chrome extension that enables users to chat with ChatGPT by opening
- 💯 Fully customizable settings page.
- 🎨 Has both light mode and dark mode !
- 📚 Answer questions in context with text from webpage.
- 👁️ Screenshot any region and query GPT 4 Vision [Beta]
- 👁️ Screenshot any region and query GPT 4 Vision
- 🔐 Secure and fully Free and open source forever. (Please consider giving this project a star 🌟 and contributing 💖 to support the project.)

## 🐳 Installation
Expand Down
Binary file modified artifacts/chrome.zip
Binary file not shown.
8 changes: 7 additions & 1 deletion manifest.config.ts
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,13 @@ export default defineManifest(async (env) => ({
'48': 'images/icon-48.png',
'128': 'images/icon-128.png',
},
permissions: ['storage', 'unlimitedStorage', 'contextMenus', 'tabs'],
permissions: [
'storage',
'unlimitedStorage',
'contextMenus',
'tabs',
'activeTab',
],
background: {
service_worker: 'src/pages/background/index.ts',
},
Expand Down
1 change: 0 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,6 @@
"axios": "^1.3.5",
"dnd-kit-sortable-tree": "^0.1.58",
"endent": "^2.1.0",
"html2canvas": "^1.4.1",
"jotai": "^2.4.3",
"langchain": "^0.0.197-rc.1",
"object-hash": "^3.0.0",
Expand Down
1 change: 0 additions & 1 deletion src/components/Sidebar/chat/ImageCaptureButton.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,6 @@ const ImageCaptureButton = ({
}
return (
<button
title="BETA: Take a screenshot and send it to the chat. It will not work on some websites."
onClick={handleScreenshotClick}
type="button"
className="cdx-bg-neutral-300 cdx-text-neutral-500 dark:cdx-text-neutral-200 dark:cdx-bg-neutral-800 cdx-p-2 cdx-rounded"
Expand Down
106 changes: 40 additions & 66 deletions src/lib/getScreenshotImage.ts
Original file line number Diff line number Diff line change
@@ -1,15 +1,10 @@
import html2canvas from 'html2canvas'

/**
* We use this function to
* 1. Create a snipping tool view for the user to select the area of the screen
* 2. Grab the screen image with canvas
* 3. Crop the image with the user's selection
* 4. Return the cropped image as a blob
*
* TODO: This approach is not ideal as the website visible to user may not be the same as the one
* captured by html2canvas. For example, if the user has adblock installed, the website may look
* different to the one captured by html2canvas. We should consider another approach to capture
* 2. Grab the coordinates of the user's selection
* 3. Take a screenshot of the screen
* 4. Crop the screenshot to the user's selection
* 5. Return the cropped image as a blob
*/
export const getScreenshotImage = async (): Promise<Blob> => {
// Create a snipping tool view for the user to select the area of the screen
Expand All @@ -32,15 +27,6 @@ export const getScreenshotImage = async (): Promise<Blob> => {
document.body.appendChild(snipeRegion)
document.body.appendChild(snipeSelection)

// Create a canvas element
const canvas: HTMLCanvasElement = document.createElement('canvas')
canvas.width = window.innerWidth
canvas.height = window.innerHeight
const ctx: CanvasRenderingContext2D | null = canvas.getContext('2d')
if (!ctx) {
throw new Error('Could not get canvas context')
}

// Initially declare the variables with a type and set to undefined
let startX: number | undefined
let startY: number | undefined
Expand All @@ -67,8 +53,6 @@ export const getScreenshotImage = async (): Promise<Blob> => {
endY = e.clientY
document.removeEventListener('mousemove', onMouseMove)
snipeRegion.removeEventListener('mouseup', onMouseUp)
document.body.removeChild(snipeRegion)
document.body.removeChild(snipeSelection)
resolve()
}

Expand All @@ -87,6 +71,11 @@ export const getScreenshotImage = async (): Promise<Blob> => {
)
})

// Remove the snipping tool view
document.body.removeChild(snipeRegion)
document.body.removeChild(snipeSelection)
await new Promise((resolve) => setTimeout(resolve, 100)) // Wait for the DOM to update

// Ensure that the coordinates are defined before using them
if (
typeof startX === 'undefined' ||
Expand All @@ -97,59 +86,44 @@ export const getScreenshotImage = async (): Promise<Blob> => {
throw new Error('Selection coordinates have not been defined.')
}

// Now we can safely use the variables as they have been assigned during the mouse events
const width: number = Math.abs(endX - startX)
const height: number = Math.abs(endY - startY)
const left: number = Math.min(startX, endX)
const top: number = Math.min(startY, endY)

// Use html2canvas to capture the content of the page
const screenshotCanvas: HTMLCanvasElement = await html2canvas(document.body, {
width: window.innerWidth,
height: window.innerHeight,
x: window.scrollX,
y: window.scrollY,
scale: 1,
useCORS: true,
// Take a screenshot of the screen
const screenshot = await new Promise<string>((resolve) => {
chrome.runtime.sendMessage({ action: 'captureVisibleTab' }, (dataUrl) => {
console.log({ dataUrl })
resolve(dataUrl)
})
})

// Create a cropped canvas as before
const croppedCanvas: HTMLCanvasElement = document.createElement('canvas')
croppedCanvas.width = width
croppedCanvas.height = height
const croppedCtx: CanvasRenderingContext2D | null =
croppedCanvas.getContext('2d')
if (!croppedCtx) {
throw new Error('Could not get cropped canvas context')
}
// Create a canvas element and draw the screenshot on it
const canvas: HTMLCanvasElement = document.createElement('canvas')
canvas.width = endX - startX
canvas.height = endY - startY
const ctx: CanvasRenderingContext2D = canvas.getContext('2d')!
const image: HTMLImageElement = new Image()
image.src = screenshot

// Wait for the image to load
await new Promise((resolve) => {
image.onload = resolve
})

// Draw the captured area from the screenshotCanvas onto the cropped canvas
croppedCtx.drawImage(
screenshotCanvas,
left,
top,
width,
height,
// Crop the screenshot to the user's selection
ctx.drawImage(
image,
startX * window.devicePixelRatio,
startY * window.devicePixelRatio,
(endX - startX) * window.devicePixelRatio,
(endY - startY) * window.devicePixelRatio,
0,
0,
width,
height,
endX - startX,
endY - startY,
)

// Convert the cropped canvas to a blob as before
const blob: Blob | null = await new Promise((resolve, reject) => {
croppedCanvas.toBlob((blob) => {
if (blob) {
resolve(blob)
} else {
reject(new Error('Blob conversion failed'))
}
// Convert the canvas to a blob and return it
return new Promise((resolve) => {
canvas.toBlob((blob) => {
resolve(blob!)
})
})

if (!blob) {
throw new Error('Blob is null')
}

return blob
}
2 changes: 2 additions & 0 deletions src/pages/background/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ import {
createContextMenuOnStorageChange,
} from './quick-menu/createContextMenu'
import { forwardContextMenuClicks } from './quick-menu/forwardContextMenu'
import { captureScreenListener } from './sidebar/captureScreenListener'
import { sendSidebarShortcut } from './sidebar/sendSidebarShortcut'
import { sidebarToggleListeners } from './sidebar/sidebarToggleListeners'

Expand All @@ -14,6 +15,7 @@ backgroundLog()
// =========================== //
sidebarToggleListeners()
sendSidebarShortcut()
captureScreenListener()

// =========================== //
// Quick menu Scripts
Expand Down
15 changes: 15 additions & 0 deletions src/pages/background/sidebar/captureScreenListener.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
/**
* This function is used to capture the current tab screen.
* It is used in the sidebar to capture the screen after
* user snipes the screen.
*/
export const captureScreenListener = async () => {
chrome.runtime.onMessage.addListener((request, _sender, sendResponse) => {
if (request.action === 'captureVisibleTab') {
chrome.tabs.captureVisibleTab((dataUrl) => {
sendResponse(dataUrl)
})
return true // This will keep the message channel open until `sendResponse` is called.
}
})
}
4 changes: 3 additions & 1 deletion src/pages/background/sidebar/sidebarToggleListeners.ts
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,9 @@ export const sidebarToggleListeners = () => {
if (message.action === 'generate') {
message.prompt
}
sendResponse({ action: 'close-sidebar' })
if (message.action === 'close-sidebar') {
sendResponse({ action: 'close-sidebar' })
}
})
}

Expand Down
34 changes: 0 additions & 34 deletions yarn.lock
Original file line number Diff line number Diff line change
Expand Up @@ -1915,11 +1915,6 @@ base-64@^0.1.0:
resolved "https://registry.yarnpkg.com/base-64/-/base-64-0.1.0.tgz#780a99c84e7d600260361511c4877613bf24f6bb"
integrity sha512-Y5gU45svrR5tI2Vt/X9GPd3L0HNIKzGu202EjxrXMpuc2V2CiKgemAbUUsqYmZJvPtCXoUKjNZwBJzsNScUbXA==

base64-arraybuffer@^1.0.2:
version "1.0.2"
resolved "https://registry.yarnpkg.com/base64-arraybuffer/-/base64-arraybuffer-1.0.2.tgz#1c37589a7c4b0746e34bd1feb951da2df01c1bdc"
integrity sha512-I3yl4r9QB5ZRY3XuJVEPfc2XhZO6YweFPI+UovAzn+8/hb3oJ6lnysaFcjVpkCPfVWFUDvoZ8kmVDP7WyRtYtQ==

base64-js@^1.3.1, base64-js@^1.5.1:
version "1.5.1"
resolved "https://registry.yarnpkg.com/base64-js/-/base64-js-1.5.1.tgz#1b1b440160a5bf7ad40b650f095963481903930a"
Expand Down Expand Up @@ -2348,13 +2343,6 @@ [email protected]:
resolved "https://registry.yarnpkg.com/crypt/-/crypt-0.0.2.tgz#88d7ff7ec0dfb86f713dc87bbb42d044d3e6c41b"
integrity sha512-mCxBlsHFYh9C+HVpiEacem8FEBnMXgU9gy4zmNC+SXAZNB/1idgp/aulFJ4FgCi7GPEVbfyng092GqL2k2rmow==

css-line-break@^2.1.0:
version "2.1.0"
resolved "https://registry.yarnpkg.com/css-line-break/-/css-line-break-2.1.0.tgz#bfef660dfa6f5397ea54116bb3cb4873edbc4fa0"
integrity sha512-FHcKFCZcAha3LwfVBhCQbW2nCNbkZXn7KVUJcsT5/P8YmfsVja0FMPJr0B903j/E69HUphKiV9iQArX8SDYA4w==
dependencies:
utrie "^1.0.2"

css-select@^5.1.0:
version "5.1.0"
resolved "https://registry.yarnpkg.com/css-select/-/css-select-5.1.0.tgz#b8ebd6554c3637ccc76688804ad3f6a6fdaea8a6"
Expand Down Expand Up @@ -3352,14 +3340,6 @@ html-void-elements@^2.0.0:
resolved "https://registry.yarnpkg.com/html-void-elements/-/html-void-elements-2.0.1.tgz#29459b8b05c200b6c5ee98743c41b979d577549f"
integrity sha512-0quDb7s97CfemeJAnW9wC0hw78MtW7NU3hqtCD75g2vFlDLt36llsYD7uB7SUzojLMP24N5IatXf7ylGXiGG9A==

html2canvas@^1.4.1:
version "1.4.1"
resolved "https://registry.yarnpkg.com/html2canvas/-/html2canvas-1.4.1.tgz#7cef1888311b5011d507794a066041b14669a543"
integrity sha512-fPU6BHNpsyIhr8yyMpTLLxAbkaK8ArIBcmZIRiBLiDhjeqvXolaEmDGmELFuX9I4xDcaKKcJl+TKZLqruBbmWA==
dependencies:
css-line-break "^2.1.0"
text-segmentation "^1.0.3"

htmlparser2@^8.0.1:
version "8.0.2"
resolved "https://registry.yarnpkg.com/htmlparser2/-/htmlparser2-8.0.2.tgz#f002151705b383e62433b5cf466f5b716edaec21"
Expand Down Expand Up @@ -6004,13 +5984,6 @@ term-size@^2.1.0:
resolved "https://registry.yarnpkg.com/term-size/-/term-size-2.2.1.tgz#2a6a54840432c2fb6320fea0f415531e90189f54"
integrity sha512-wK0Ri4fOGjv/XPy8SBHZChl8CM7uMc5VML7SqiQ0zG7+J5Vr+RMQDoHa2CNT6KHUnTGIXH34UDMkPzAUyapBZg==

text-segmentation@^1.0.3:
version "1.0.3"
resolved "https://registry.yarnpkg.com/text-segmentation/-/text-segmentation-1.0.3.tgz#52a388159efffe746b24a63ba311b6ac9f2d7943"
integrity sha512-iOiPUo/BGnZ6+54OsWxZidGCsdU8YbE4PSpdPinp7DeMtUJNJBoJ/ouUSTJjHkh1KntHaltHl/gDs2FC4i5+Nw==
dependencies:
utrie "^1.0.2"

thenify-all@^1.0.0:
version "1.6.0"
resolved "https://registry.yarnpkg.com/thenify-all/-/thenify-all-1.6.0.tgz#1a1918d402d8fc3f98fbf234db0bcc8cc10e9726"
Expand Down Expand Up @@ -6252,13 +6225,6 @@ util-deprecate@^1.0.1, util-deprecate@^1.0.2, util-deprecate@~1.0.1:
resolved "https://registry.yarnpkg.com/util-deprecate/-/util-deprecate-1.0.2.tgz#450d4dc9fa70de732762fbd2d4a28981419a0ccf"
integrity sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw==

utrie@^1.0.2:
version "1.0.2"
resolved "https://registry.yarnpkg.com/utrie/-/utrie-1.0.2.tgz#d42fe44de9bc0119c25de7f564a6ed1b2c87a645"
integrity sha512-1MLa5ouZiOmQzUbjbu9VmjLzn1QLXBhwpUa7kdLUQK+KQ5KA9I1vk5U4YHe/X2Ch7PYnJfWuWT+VbuxbGwljhw==
dependencies:
base64-arraybuffer "^1.0.2"

uuid@^9.0.0:
version "9.0.1"
resolved "https://registry.yarnpkg.com/uuid/-/uuid-9.0.1.tgz#e188d4c8853cc722220392c424cd637f32293f30"
Expand Down