Skip to content

Commit edb9f06

Browse files
committed
fix: allow Unicode object names
This allows all Unicode characters that are also valid in XML 1.0 documents. See: https://docs.aws.amazon.com/AmazonS3/latest/userguide/object-keys.html See: https://www.w3.org/TR/REC-xml/#charsets
1 parent 6f58fe2 commit edb9f06

File tree

11 files changed

+428
-9
lines changed

11 files changed

+428
-9
lines changed
Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,4 @@
1+
ALTER TABLE "storage"."objects"
2+
ADD CONSTRAINT objects_name_check
3+
CHECK (name SIMILAR TO '[\x09\x0A\x0D\x20-\xD7FF\xE000-\xFFFD\x00010000-\x0010ffff]+');
4+

src/http/plugins/xml.ts

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,6 +21,10 @@ export const xmlParser = fastifyPlugin(
2121
isArray: (_: string, jpath: string) => {
2222
return opts.parseAsArray?.includes(jpath)
2323
},
24+
tagValueProcessor: (name: string, value: string) =>
25+
value.replace(/&#x([0-9a-fA-F]{1,6});/g, (_, str: string) =>
26+
String.fromCharCode(Number.parseInt(str, 16))
27+
),
2428
})
2529
}
2630

src/internal/database/migrations/types.ts

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,4 +24,5 @@ export const DBMigration = {
2424
'optimize-search-function': 22,
2525
'operation-function': 23,
2626
'custom-metadata': 24,
27+
'unicode-object-names': 25,
2728
} as const

src/internal/errors/codes.ts

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -265,7 +265,7 @@ export const ERRORS = {
265265
code: ErrorCode.InvalidKey,
266266
resource: key,
267267
httpStatusCode: 400,
268-
message: `Invalid key: ${key}`,
268+
message: `Invalid key: ${encodeURIComponent(key)}`,
269269
originalError: e,
270270
}),
271271

src/storage/database/knex.ts

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -877,7 +877,11 @@ export class DBError extends StorageBackendError implements RenderableError {
877877
code: pgError.code,
878878
})
879879
default:
880-
return ERRORS.DatabaseError(pgError.message, pgError).withMetadata({
880+
const errorMessage =
881+
pgError.code === '23514' && pgError.constraint === 'objects_name_check'
882+
? 'Invalid object name'
883+
: pgError.message
884+
return ERRORS.DatabaseError(errorMessage, pgError).withMetadata({
881885
query,
882886
code: pgError.code,
883887
})

src/storage/limits.ts

Lines changed: 9 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -47,9 +47,15 @@ export async function isImageTransformationEnabled(tenantId: string) {
4747
* @param key
4848
*/
4949
export function isValidKey(key: string): boolean {
50-
// only allow s3 safe characters and characters which require special handling for now
51-
// https://docs.aws.amazon.com/AmazonS3/latest/userguide/object-keys.html
52-
return key.length > 0 && /^(\w|\/|!|-|\.|\*|'|\(|\)| |&|\$|@|=|;|:|\+|,|\?)*$/.test(key)
50+
// Allow any sequence of Unicode characters with UTF-8 encoding,
51+
// except characters not allowed in XML 1.0.
52+
// See: https://docs.aws.amazon.com/AmazonS3/latest/userguide/object-keys.html
53+
// See: https://www.w3.org/TR/REC-xml/#charsets
54+
//
55+
const regex =
56+
/[\0-\x08\x0B\f\x0E-\x1F\uFFFE\uFFFF]|[\uD800-\uDBFF](?![\uDC00-\uDFFF])|(?:[^\uD800-\uDBFF]|^)[\uDC00-\uDFFF]/
57+
58+
return key.length > 0 && !regex.test(key)
5359
}
5460

5561
/**

src/test/common.ts

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,42 @@ export const adminApp = app({})
88

99
const ENV = process.env
1010

11+
/**
12+
* Should support all Unicode characters with UTF-8 encoding according to AWS S3 object naming guide, including:
13+
* - Safe characters: 0-9 a-z A-Z !-_.*'()
14+
* - Characters that might require special handling: &$@=;/:+,? and Space and ASCII characters \t, \n, and \r.
15+
* - Characters: \{}^%`[]"<>~#| and non-printable ASCII characters (128–255 decimal characters).
16+
*
17+
* The following characters are not allowed:
18+
* - ASCII characters 0x00–0x1F, except 0x09, 0x0A, and 0x0D.
19+
* - Unicode \u{FFFE} and \u{FFFF}.
20+
* - Lone surrogates characters.
21+
* See: https://docs.aws.amazon.com/AmazonS3/latest/userguide/object-keys.html
22+
* See: https://www.w3.org/TR/REC-xml/#charsets
23+
*/
24+
export function getUnicodeObjectName(): string {
25+
const objectName = 'test'
26+
.concat("!-_*.'()")
27+
// Characters that might require special handling
28+
.concat('&$@=;:+,? \x09\x0A\x0D')
29+
// Characters to avoid
30+
.concat('\\{}^%`[]"<>~#|\xFF')
31+
// MinIO max. length for each '/' separated segment is 255
32+
.concat('/')
33+
.concat([...Array(127).keys()].map((i) => String.fromCodePoint(i + 128)).join(''))
34+
.concat('/')
35+
// Some special Unicode characters
36+
.concat('\u2028\u202F\u{0001FFFF}')
37+
// Some other Unicode characters
38+
.concat('일이삼\u{0001f642}')
39+
40+
return objectName
41+
}
42+
43+
export function getInvalidObjectName(): string {
44+
return 'test\x01\x02\x03.txt'
45+
}
46+
1147
export function useMockQueue() {
1248
const queueSpy: jest.SpyInstance | undefined = undefined
1349
beforeEach(() => {

src/test/object.test.ts

Lines changed: 89 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -6,10 +6,11 @@ import app from '../app'
66
import { getConfig, mergeConfig } from '../config'
77
import { signJWT } from '@internal/auth'
88
import { Obj, backends } from '../storage'
9-
import { useMockObject, useMockQueue } from './common'
9+
import { getInvalidObjectName, getUnicodeObjectName, useMockObject, useMockQueue } from './common'
1010
import { getServiceKeyUser, getPostgresConnection } from '@internal/database'
1111
import { Knex } from 'knex'
1212
import { ErrorCode, StorageBackendError } from '@internal/errors'
13+
import { randomUUID } from 'crypto'
1314

1415
const { jwtSecret, serviceKey, tenantId } = getConfig()
1516
const anonKey = process.env.ANON_KEY || ''
@@ -2456,3 +2457,90 @@ describe('testing list objects', () => {
24562457
expect(responseJSON[1].name).toBe('sadcat-upload23.png')
24572458
})
24582459
})
2460+
2461+
describe('Object key names with Unicode characters', () => {
2462+
test('can upload, get, list, and delete', async () => {
2463+
const prefix = `test-utf8-${randomUUID()}`
2464+
const objectName = getUnicodeObjectName()
2465+
2466+
const form = new FormData()
2467+
form.append('file', fs.createReadStream(`./src/test/assets/sadcat.jpg`))
2468+
const headers = Object.assign({}, form.getHeaders(), {
2469+
authorization: `Bearer ${serviceKey}`,
2470+
'x-upsert': 'true',
2471+
})
2472+
2473+
const uploadResponse = await app().inject({
2474+
method: 'POST',
2475+
url: `/object/bucket2/${prefix}/${encodeURIComponent(objectName)}`,
2476+
headers: {
2477+
...headers,
2478+
...form.getHeaders(),
2479+
},
2480+
payload: form,
2481+
})
2482+
expect(uploadResponse.statusCode).toBe(200)
2483+
2484+
const getResponse = await app().inject({
2485+
method: 'GET',
2486+
url: `/object/bucket2/${prefix}/${encodeURIComponent(objectName)}`,
2487+
headers: {
2488+
authorization: `Bearer ${serviceKey}`,
2489+
},
2490+
})
2491+
expect(getResponse.statusCode).toBe(200)
2492+
expect(getResponse.headers['etag']).toBe('abc')
2493+
expect(getResponse.headers['last-modified']).toBe('Thu, 12 Aug 2021 16:00:00 GMT')
2494+
expect(getResponse.headers['cache-control']).toBe('no-cache')
2495+
2496+
const listResponse = await app().inject({
2497+
method: 'POST',
2498+
url: `/object/list/bucket2`,
2499+
headers: {
2500+
authorization: `Bearer ${serviceKey}`,
2501+
},
2502+
payload: { prefix },
2503+
})
2504+
expect(listResponse.statusCode).toBe(200)
2505+
const listResponseJSON = JSON.parse(listResponse.body)
2506+
expect(listResponseJSON).toHaveLength(1)
2507+
expect(listResponseJSON[0].name).toBe(objectName.split('/')[0])
2508+
2509+
const deleteResponse = await app().inject({
2510+
method: 'DELETE',
2511+
url: `/object/bucket2/${prefix}/${encodeURIComponent(objectName)}`,
2512+
headers: {
2513+
authorization: `Bearer ${serviceKey}`,
2514+
},
2515+
})
2516+
expect(deleteResponse.statusCode).toBe(200)
2517+
})
2518+
2519+
test('should not upload if the name contains invalid characters', async () => {
2520+
const invalidObjectName = getInvalidObjectName()
2521+
const form = new FormData()
2522+
form.append('file', fs.createReadStream(`./src/test/assets/sadcat.jpg`))
2523+
const headers = Object.assign({}, form.getHeaders(), {
2524+
authorization: `Bearer ${serviceKey}`,
2525+
'x-upsert': 'true',
2526+
})
2527+
const uploadResponse = await app().inject({
2528+
method: 'POST',
2529+
url: `/object/bucket2/${encodeURIComponent(invalidObjectName)}`,
2530+
headers: {
2531+
...headers,
2532+
...form.getHeaders(),
2533+
},
2534+
payload: form,
2535+
})
2536+
expect(uploadResponse.statusCode).toBe(400)
2537+
expect(S3Backend.prototype.uploadObject).not.toHaveBeenCalled()
2538+
expect(uploadResponse.body).toBe(
2539+
JSON.stringify({
2540+
statusCode: '400',
2541+
error: 'InvalidKey',
2542+
message: `Invalid key: ${encodeURIComponent(invalidObjectName)}`,
2543+
})
2544+
)
2545+
})
2546+
})

src/test/s3-protocol.test.ts

Lines changed: 146 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -32,6 +32,7 @@ import { randomUUID } from 'crypto'
3232
import { getSignedUrl } from '@aws-sdk/s3-request-presigner'
3333
import axios from 'axios'
3434
import { createPresignedPost } from '@aws-sdk/s3-presigned-post'
35+
import { getInvalidObjectName, getUnicodeObjectName } from './common'
3536

3637
const { s3ProtocolAccessKeySecret, s3ProtocolAccessKeyId, storageS3Region } = getConfig()
3738

@@ -1402,5 +1403,150 @@ describe('S3 Protocol', () => {
14021403
expect(resp.ok).toBeTruthy()
14031404
})
14041405
})
1406+
1407+
describe('Object key names with Unicode characters', () => {
1408+
it('can be used with MultipartUpload commands', async () => {
1409+
const bucketName = await createBucket(client)
1410+
const objectName = getUnicodeObjectName()
1411+
const createMultiPartUpload = new CreateMultipartUploadCommand({
1412+
Bucket: bucketName,
1413+
Key: objectName,
1414+
ContentType: 'image/jpg',
1415+
CacheControl: 'max-age=2000',
1416+
})
1417+
const createMultipartResp = await client.send(createMultiPartUpload)
1418+
expect(createMultipartResp.UploadId).toBeTruthy()
1419+
const uploadId = createMultipartResp.UploadId
1420+
1421+
const listMultipartUploads = new ListMultipartUploadsCommand({
1422+
Bucket: bucketName,
1423+
})
1424+
const listMultipartResp = await client.send(listMultipartUploads)
1425+
expect(listMultipartResp.Uploads?.length).toBe(1)
1426+
expect(listMultipartResp.Uploads?.[0].Key).toBe(objectName)
1427+
1428+
const data = Buffer.alloc(1024 * 1024 * 2)
1429+
const uploadPart = new UploadPartCommand({
1430+
Bucket: bucketName,
1431+
Key: objectName,
1432+
ContentLength: data.length,
1433+
UploadId: uploadId,
1434+
Body: data,
1435+
PartNumber: 1,
1436+
})
1437+
const uploadPartResp = await client.send(uploadPart)
1438+
expect(uploadPartResp.ETag).toBeTruthy()
1439+
1440+
const listParts = new ListPartsCommand({
1441+
Bucket: bucketName,
1442+
Key: objectName,
1443+
UploadId: uploadId,
1444+
})
1445+
const listPartsResp = await client.send(listParts)
1446+
expect(listPartsResp.Parts?.length).toBe(1)
1447+
1448+
const completeMultiPartUpload = new CompleteMultipartUploadCommand({
1449+
Bucket: bucketName,
1450+
Key: objectName,
1451+
UploadId: uploadId,
1452+
MultipartUpload: {
1453+
Parts: [
1454+
{
1455+
PartNumber: 1,
1456+
ETag: uploadPartResp.ETag,
1457+
},
1458+
],
1459+
},
1460+
})
1461+
const completeMultipartResp = await client.send(completeMultiPartUpload)
1462+
expect(completeMultipartResp.$metadata.httpStatusCode).toBe(200)
1463+
expect(completeMultipartResp.Key).toEqual(objectName)
1464+
})
1465+
1466+
it('can be used with Put, List, and Delete Object commands', async () => {
1467+
const bucketName = await createBucket(client)
1468+
const objectName = getUnicodeObjectName()
1469+
const putObject = new PutObjectCommand({
1470+
Bucket: bucketName,
1471+
Key: objectName,
1472+
Body: Buffer.alloc(1024 * 1024 * 1),
1473+
})
1474+
const putObjectResp = await client.send(putObject)
1475+
expect(putObjectResp.$metadata.httpStatusCode).toEqual(200)
1476+
1477+
const listObjects = new ListObjectsCommand({
1478+
Bucket: bucketName,
1479+
})
1480+
const listObjectsResp = await client.send(listObjects)
1481+
expect(listObjectsResp.Contents?.length).toBe(1)
1482+
expect(listObjectsResp.Contents?.[0].Key).toBe(objectName)
1483+
1484+
const listObjectsV2 = new ListObjectsV2Command({
1485+
Bucket: bucketName,
1486+
})
1487+
const listObjectsV2Resp = await client.send(listObjectsV2)
1488+
expect(listObjectsV2Resp.Contents?.length).toBe(1)
1489+
expect(listObjectsV2Resp.Contents?.[0].Key).toBe(objectName)
1490+
1491+
const getObject = new GetObjectCommand({
1492+
Bucket: bucketName,
1493+
Key: objectName,
1494+
})
1495+
const getObjectResp = await client.send(getObject)
1496+
const getObjectRespData = await getObjectResp.Body?.transformToByteArray()
1497+
expect(getObjectRespData).toBeTruthy()
1498+
expect(getObjectResp.ETag).toBeTruthy()
1499+
1500+
const deleteObjects = new DeleteObjectsCommand({
1501+
Bucket: bucketName,
1502+
Delete: {
1503+
Objects: [
1504+
{
1505+
Key: objectName,
1506+
},
1507+
],
1508+
},
1509+
})
1510+
const deleteObjectsResp = await client.send(deleteObjects)
1511+
expect(deleteObjectsResp.Errors).toBeFalsy()
1512+
expect(deleteObjectsResp.Deleted).toEqual([
1513+
{
1514+
Key: objectName,
1515+
},
1516+
])
1517+
1518+
const getObjectDeleted = new GetObjectCommand({
1519+
Bucket: bucketName,
1520+
Key: objectName,
1521+
})
1522+
try {
1523+
await client.send(getObjectDeleted)
1524+
throw new Error('Should not reach here')
1525+
} catch (e) {
1526+
expect((e as S3ServiceException).$metadata.httpStatusCode).toEqual(404)
1527+
}
1528+
})
1529+
1530+
it('should not upload if the name contains invalid characters', async () => {
1531+
const bucketName = await createBucket(client)
1532+
const invalidObjectName = getInvalidObjectName()
1533+
try {
1534+
const putObject = new PutObjectCommand({
1535+
Bucket: bucketName,
1536+
Key: invalidObjectName,
1537+
Body: Buffer.alloc(1024 * 1024 * 1),
1538+
})
1539+
await client.send(putObject)
1540+
throw new Error('Should not reach here')
1541+
} catch (e) {
1542+
expect((e as Error).message).not.toBe('Should not reach here')
1543+
expect((e as S3ServiceException).$metadata.httpStatusCode).toEqual(400)
1544+
expect((e as S3ServiceException).name).toEqual('InvalidKey')
1545+
expect((e as S3ServiceException).message).toEqual(
1546+
`Invalid key: ${encodeURIComponent(invalidObjectName)}`
1547+
)
1548+
}
1549+
})
1550+
})
14051551
})
14061552
})

src/test/tenant.test.ts

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,7 +16,7 @@ const payload = {
1616
serviceKey: 'd',
1717
jwks: { keys: [] },
1818
migrationStatus: 'COMPLETED',
19-
migrationVersion: 'custom-metadata',
19+
migrationVersion: 'unicode-object-names',
2020
tracingMode: 'basic',
2121
features: {
2222
imageTransformation: {
@@ -40,7 +40,7 @@ const payload2 = {
4040
serviceKey: 'h',
4141
jwks: null,
4242
migrationStatus: 'COMPLETED',
43-
migrationVersion: 'custom-metadata',
43+
migrationVersion: 'unicode-object-names',
4444
tracingMode: 'basic',
4545
features: {
4646
imageTransformation: {

0 commit comments

Comments
 (0)