Skip to content

Commit f82aeea

Browse files
authored
Simplify UTF8 StrPtr usage (pythonnet#2374)
* Use non-BOM encodings * Copy potential BOM to the output of PyString_FromString The documentation of the used `PyUnicode_DecodeUTF16` states that not passing `*byteorder` or passing a 0 results in the first two bytes, if they are the BOM (U+FEFF, zero-width no-break space), to be interpreted and skipped, which is incorrect when we convert a known "non BOM" string, which all strings from C# are. * Default to UTF8 for StrPtr
1 parent b112885 commit f82aeea

File tree

4 files changed

+27
-23
lines changed

4 files changed

+27
-23
lines changed

src/embed_tests/TestPyType.cs

+2-1
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,8 @@ public void CanCreateHeapType()
2828
const string name = "nÁmæ";
2929
const string docStr = "dÁcæ";
3030

31-
using var doc = new StrPtr(docStr, Encodings.UTF8);
31+
using var doc = new StrPtr(docStr);
32+
3233
var spec = new TypeSpec(
3334
name: name,
3435
basicSize: Util.ReadInt32(Runtime.Runtime.PyBaseObjectType, TypeOffset.tp_basicsize),

src/runtime/Native/NativeTypeSpec.cs

+1-1
Original file line numberDiff line numberDiff line change
@@ -17,7 +17,7 @@ public NativeTypeSpec(TypeSpec spec)
1717
{
1818
if (spec is null) throw new ArgumentNullException(nameof(spec));
1919

20-
this.Name = new StrPtr(spec.Name, Encodings.UTF8);
20+
this.Name = new StrPtr(spec.Name);
2121
this.BasicSize = spec.BasicSize;
2222
this.ItemSize = spec.ItemSize;
2323
this.Flags = (int)spec.Flags;

src/runtime/Native/StrPtr.cs

+2
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,8 @@ struct StrPtr : IDisposable
1010
public IntPtr RawPointer { get; set; }
1111
unsafe byte* Bytes => (byte*)this.RawPointer;
1212

13+
public unsafe StrPtr(string value) : this(value, Encodings.UTF8) {}
14+
1315
public unsafe StrPtr(string value, Encoding encoding)
1416
{
1517
if (value is null) throw new ArgumentNullException(nameof(value));

src/runtime/Runtime.cs

+22-21
Original file line numberDiff line numberDiff line change
@@ -795,13 +795,13 @@ public static int Py_Main(int argc, string[] argv)
795795

796796
internal static int PyRun_SimpleString(string code)
797797
{
798-
using var codePtr = new StrPtr(code, Encodings.UTF8);
798+
using var codePtr = new StrPtr(code);
799799
return Delegates.PyRun_SimpleStringFlags(codePtr, Utf8String);
800800
}
801801

802802
internal static NewReference PyRun_String(string code, RunFlagType st, BorrowedReference globals, BorrowedReference locals)
803803
{
804-
using var codePtr = new StrPtr(code, Encodings.UTF8);
804+
using var codePtr = new StrPtr(code);
805805
return Delegates.PyRun_StringFlags(codePtr, st, globals, locals, Utf8String);
806806
}
807807

@@ -813,14 +813,15 @@ internal static NewReference PyRun_String(string code, RunFlagType st, BorrowedR
813813
/// </summary>
814814
internal static NewReference Py_CompileString(string str, string file, int start)
815815
{
816-
using var strPtr = new StrPtr(str, Encodings.UTF8);
816+
using var strPtr = new StrPtr(str);
817+
817818
using var fileObj = new PyString(file);
818819
return Delegates.Py_CompileStringObject(strPtr, fileObj, start, Utf8String, -1);
819820
}
820821

821822
internal static NewReference PyImport_ExecCodeModule(string name, BorrowedReference code)
822823
{
823-
using var namePtr = new StrPtr(name, Encodings.UTF8);
824+
using var namePtr = new StrPtr(name);
824825
return Delegates.PyImport_ExecCodeModule(namePtr, code);
825826
}
826827

@@ -867,13 +868,13 @@ internal static bool PyObject_IsIterable(BorrowedReference ob)
867868

868869
internal static int PyObject_HasAttrString(BorrowedReference pointer, string name)
869870
{
870-
using var namePtr = new StrPtr(name, Encodings.UTF8);
871+
using var namePtr = new StrPtr(name);
871872
return Delegates.PyObject_HasAttrString(pointer, namePtr);
872873
}
873874

874875
internal static NewReference PyObject_GetAttrString(BorrowedReference pointer, string name)
875876
{
876-
using var namePtr = new StrPtr(name, Encodings.UTF8);
877+
using var namePtr = new StrPtr(name);
877878
return Delegates.PyObject_GetAttrString(pointer, namePtr);
878879
}
879880

@@ -884,12 +885,12 @@ internal static NewReference PyObject_GetAttrString(BorrowedReference pointer, S
884885
internal static int PyObject_DelAttr(BorrowedReference @object, BorrowedReference name) => Delegates.PyObject_SetAttr(@object, name, null);
885886
internal static int PyObject_DelAttrString(BorrowedReference @object, string name)
886887
{
887-
using var namePtr = new StrPtr(name, Encodings.UTF8);
888+
using var namePtr = new StrPtr(name);
888889
return Delegates.PyObject_SetAttrString(@object, namePtr, null);
889890
}
890891
internal static int PyObject_SetAttrString(BorrowedReference @object, string name, BorrowedReference value)
891892
{
892-
using var namePtr = new StrPtr(name, Encodings.UTF8);
893+
using var namePtr = new StrPtr(name);
893894
return Delegates.PyObject_SetAttrString(@object, namePtr, value);
894895
}
895896

@@ -1071,7 +1072,7 @@ internal static bool PyBool_CheckExact(BorrowedReference ob)
10711072

10721073
internal static NewReference PyLong_FromString(string value, int radix)
10731074
{
1074-
using var valPtr = new StrPtr(value, Encodings.UTF8);
1075+
using var valPtr = new StrPtr(value);
10751076
return Delegates.PyLong_FromString(valPtr, IntPtr.Zero, radix);
10761077
}
10771078

@@ -1274,7 +1275,7 @@ internal static NewReference EmptyPyBytes()
12741275
internal static NewReference PyByteArray_FromStringAndSize(IntPtr strPtr, nint len) => Delegates.PyByteArray_FromStringAndSize(strPtr, len);
12751276
internal static NewReference PyByteArray_FromStringAndSize(string s)
12761277
{
1277-
using var ptr = new StrPtr(s, Encodings.UTF8);
1278+
using var ptr = new StrPtr(s);
12781279
return PyByteArray_FromStringAndSize(ptr.RawPointer, checked((nint)ptr.ByteCount));
12791280
}
12801281

@@ -1302,7 +1303,7 @@ internal static IntPtr PyBytes_AsString(BorrowedReference ob)
13021303

13031304
internal static NewReference PyUnicode_InternFromString(string s)
13041305
{
1305-
using var ptr = new StrPtr(s, Encodings.UTF8);
1306+
using var ptr = new StrPtr(s);
13061307
return Delegates.PyUnicode_InternFromString(ptr);
13071308
}
13081309

@@ -1377,7 +1378,7 @@ internal static bool PyDict_Check(BorrowedReference ob)
13771378

13781379
internal static BorrowedReference PyDict_GetItemString(BorrowedReference pointer, string key)
13791380
{
1380-
using var keyStr = new StrPtr(key, Encodings.UTF8);
1381+
using var keyStr = new StrPtr(key);
13811382
return Delegates.PyDict_GetItemString(pointer, keyStr);
13821383
}
13831384

@@ -1393,7 +1394,7 @@ internal static BorrowedReference PyDict_GetItemString(BorrowedReference pointer
13931394
/// </summary>
13941395
internal static int PyDict_SetItemString(BorrowedReference dict, string key, BorrowedReference value)
13951396
{
1396-
using var keyPtr = new StrPtr(key, Encodings.UTF8);
1397+
using var keyPtr = new StrPtr(key);
13971398
return Delegates.PyDict_SetItemString(dict, keyPtr, value);
13981399
}
13991400

@@ -1402,7 +1403,7 @@ internal static int PyDict_SetItemString(BorrowedReference dict, string key, Bor
14021403

14031404
internal static int PyDict_DelItemString(BorrowedReference pointer, string key)
14041405
{
1405-
using var keyPtr = new StrPtr(key, Encodings.UTF8);
1406+
using var keyPtr = new StrPtr(key);
14061407
return Delegates.PyDict_DelItemString(pointer, keyPtr);
14071408
}
14081409

@@ -1517,7 +1518,7 @@ internal static bool PyIter_Check(BorrowedReference ob)
15171518

15181519
internal static NewReference PyModule_New(string name)
15191520
{
1520-
using var namePtr = new StrPtr(name, Encodings.UTF8);
1521+
using var namePtr = new StrPtr(name);
15211522
return Delegates.PyModule_New(namePtr);
15221523
}
15231524

@@ -1531,7 +1532,7 @@ internal static NewReference PyModule_New(string name)
15311532
/// <returns>Return -1 on error, 0 on success.</returns>
15321533
internal static int PyModule_AddObject(BorrowedReference module, string name, StolenReference value)
15331534
{
1534-
using var namePtr = new StrPtr(name, Encodings.UTF8);
1535+
using var namePtr = new StrPtr(name);
15351536
IntPtr valueAddr = value.DangerousGetAddressOrNull();
15361537
int res = Delegates.PyModule_AddObject(module, namePtr, valueAddr);
15371538
// We can't just exit here because the reference is stolen only on success.
@@ -1549,7 +1550,7 @@ internal static int PyModule_AddObject(BorrowedReference module, string name, St
15491550

15501551
internal static NewReference PyImport_ImportModule(string name)
15511552
{
1552-
using var namePtr = new StrPtr(name, Encodings.UTF8);
1553+
using var namePtr = new StrPtr(name);
15531554
return Delegates.PyImport_ImportModule(namePtr);
15541555
}
15551556

@@ -1558,7 +1559,7 @@ internal static NewReference PyImport_ImportModule(string name)
15581559

15591560
internal static BorrowedReference PyImport_AddModule(string name)
15601561
{
1561-
using var namePtr = new StrPtr(name, Encodings.UTF8);
1562+
using var namePtr = new StrPtr(name);
15621563
return Delegates.PyImport_AddModule(namePtr);
15631564
}
15641565

@@ -1586,13 +1587,13 @@ internal static void PySys_SetArgvEx(int argc, string[] argv, int updatepath)
15861587

15871588
internal static BorrowedReference PySys_GetObject(string name)
15881589
{
1589-
using var namePtr = new StrPtr(name, Encodings.UTF8);
1590+
using var namePtr = new StrPtr(name);
15901591
return Delegates.PySys_GetObject(namePtr);
15911592
}
15921593

15931594
internal static int PySys_SetObject(string name, BorrowedReference ob)
15941595
{
1595-
using var namePtr = new StrPtr(name, Encodings.UTF8);
1596+
using var namePtr = new StrPtr(name);
15961597
return Delegates.PySys_SetObject(namePtr, ob);
15971598
}
15981599

@@ -1691,7 +1692,7 @@ internal static IntPtr PyMem_Malloc(long size)
16911692

16921693
internal static void PyErr_SetString(BorrowedReference ob, string message)
16931694
{
1694-
using var msgPtr = new StrPtr(message, Encodings.UTF8);
1695+
using var msgPtr = new StrPtr(message);
16951696
Delegates.PyErr_SetString(ob, msgPtr);
16961697
}
16971698

0 commit comments

Comments
 (0)