From 80e995fb3bb809d65a6b0b4aea3c17b0f87c609f Mon Sep 17 00:00:00 2001 From: Zyko Date: Sat, 7 Oct 2023 17:01:28 +0200 Subject: [PATCH 01/12] Attempts at moving stuff outside the MakeFunc scope --- func.go | 256 +++++++++++++++++++++++++++++++++++++++++++++++++++ func_test.go | 149 ++++++++++++++++++++++++++++++ 2 files changed, 405 insertions(+) diff --git a/func.go b/func.go index bc9687ca..ecd2dbd5 100644 --- a/func.go +++ b/func.go @@ -24,6 +24,14 @@ func RegisterLibFunc(fptr interface{}, handle uintptr, name string) { RegisterFunc(fptr, sym) } +func RegisterLibFunc2(fptr interface{}, handle uintptr, name string) { + sym, err := loadSymbol(handle, name) + if err != nil { + panic(err) + } + RegisterFunc2(fptr, sym) +} + // RegisterFunc takes a pointer to a Go function representing the calling convention of the C function. // fptr will be set to a function that when called will call the C function given by cfn with the // parameters passed in the correct registers and stack. @@ -284,6 +292,254 @@ func RegisterFunc(fptr interface{}, cfn uintptr) { fn.Set(v) } +func RegisterFunc2(fptr interface{}, cfn uintptr) { + fn := reflect.ValueOf(fptr).Elem() + ty := fn.Type() + if ty.Kind() != reflect.Func { + panic("purego: fptr must be a function pointer") + } + if ty.NumOut() > 1 { + panic("purego: function can only return zero or one values") + } + if cfn == 0 { + panic("purego: cfn is nil") + } + { + // this code checks how many registers and stack this function will use + // to avoid crashing with too many arguments + var ints int + var floats int + var stack int + for i := 0; i < ty.NumIn(); i++ { + arg := ty.In(i) + switch arg.Kind() { + case reflect.String, reflect.Uintptr, reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, + reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Ptr, reflect.UnsafePointer, reflect.Slice, + reflect.Func, reflect.Bool: + if ints < numOfIntegerRegisters() { + ints++ + } else { + stack++ + } + case reflect.Float32, reflect.Float64: + if floats < numOfFloats { + floats++ + } else { + stack++ + } + default: + panic("purego: unsupported kind " + arg.Kind().String()) + } + } + sizeOfStack := maxArgs - numOfIntegerRegisters() + if stack > sizeOfStack { + panic("purego: too many arguments") + } + } + + var sysargs [maxArgs]uintptr + stack := sysargs[numOfIntegerRegisters():] + var floats [numOfFloats]uintptr + var numInts int + var numFloats int + var numStack int + var addStack, addInt, addFloat func(x uintptr) + if runtime.GOARCH == "arm64" || runtime.GOOS != "windows" { + // Windows arm64 uses the same calling convention as macOS and Linux + addStack = func(x uintptr) { + stack[numStack] = x + numStack++ + } + addInt = func(x uintptr) { + if numInts >= numOfIntegerRegisters() { + addStack(x) + } else { + sysargs[numInts] = x + numInts++ + } + } + addFloat = func(x uintptr) { + if numFloats < len(floats) { + floats[numFloats] = x + numFloats++ + } else { + addStack(x) + } + } + } else { + // On Windows amd64 the arguments are passed in the numbered registered. + // So the first int is in the first integer register and the first float + // is in the second floating register if there is already a first int. + // This is in contrast to how macOS and Linux pass arguments which + // tries to use as many registers as possible in the calling convention. + addStack = func(x uintptr) { + sysargs[numStack] = x + numStack++ + } + addInt = addStack + addFloat = addStack + } + + var keepAlive []interface{} + // Parameters + addFuncs := make([]func(v reflect.Value), ty.NumIn()) + for i := 0; i < ty.NumIn(); i++ { + arg := ty.In(i) + switch arg.Kind() { + case reflect.String: + addFuncs[i] = func(v reflect.Value) { + ptr := strings.CString(v.String()) + keepAlive = append(keepAlive, ptr) + addInt(uintptr(unsafe.Pointer(ptr))) + } + case reflect.Uintptr, reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64: + addFuncs[i] = func(v reflect.Value) { + addInt(uintptr(v.Uint())) + } + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: + addFuncs[i] = func(v reflect.Value) { + addInt(uintptr(v.Int())) + } + case reflect.Ptr, reflect.UnsafePointer, reflect.Slice: + // There is no need to keepAlive this pointer separately because it is kept alive in the args variable + addFuncs[i] = func(v reflect.Value) { + addInt(v.Pointer()) + } + case reflect.Func: + addFuncs[i] = func(v reflect.Value) { + addInt(NewCallback(v.Interface())) + } + case reflect.Bool: + addFuncs[i] = func(v reflect.Value) { + if v.Bool() { + addInt(1) + } else { + addInt(0) + } + } + case reflect.Float32: + addFuncs[i] = func(v reflect.Value) { + addFloat(uintptr(math.Float32bits(float32(v.Float())))) + } + case reflect.Float64: + addFuncs[i] = func(v reflect.Value) { + addFloat(uintptr(math.Float64bits(v.Float()))) + } + default: + panic("purego: unsupported kind: " + arg.Kind().String()) + } + } + // Return value + var outFunc func(r1, r2 uintptr) []reflect.Value = func(_, _ uintptr) []reflect.Value { + return nil + } + if ty.NumOut() > 0 { + outType := ty.Out(0) + switch outType.Kind() { + case reflect.Uintptr, reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64: + outFunc = func(r1, r2 uintptr) []reflect.Value { + v := reflect.New(outType).Elem() + v.SetUint(uint64(r1)) + return []reflect.Value{v} + } + case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: + outFunc = func(r1, r2 uintptr) []reflect.Value { + v := reflect.New(outType).Elem() + v.SetInt(int64(r1)) + return []reflect.Value{v} + } + case reflect.Bool: + outFunc = func(r1, r2 uintptr) []reflect.Value { + v := reflect.New(outType).Elem() + v.SetBool(r1 != 0) + return []reflect.Value{v} + } + case reflect.UnsafePointer: + outFunc = func(r1, r2 uintptr) []reflect.Value { + v := reflect.New(outType).Elem() + // We take the address and then dereference it to trick go vet from creating a possible miss-use of unsafe.Pointer + v.SetPointer(*(*unsafe.Pointer)(unsafe.Pointer(&r1))) + return []reflect.Value{v} + } + case reflect.Ptr: + outFunc = func(r1, r2 uintptr) []reflect.Value { + // It is safe to have the address of r1 not escape because it is immediately dereferenced with .Elem() + v := reflect.NewAt(outType, runtime_noescape(unsafe.Pointer(&r1))).Elem() + return []reflect.Value{v} + } + case reflect.Func: + outFunc = func(r1, r2 uintptr) []reflect.Value { + // wrap this C function in a nicely typed Go function + v := reflect.New(outType) + RegisterFunc(v.Interface(), r1) + return []reflect.Value{v} + } + case reflect.String: + outFunc = func(r1, r2 uintptr) []reflect.Value { + v := reflect.New(outType) + v.SetString(strings.GoString(r1)) + return []reflect.Value{v} + } + case reflect.Float32, reflect.Float64: + outFunc = func(r1, r2 uintptr) []reflect.Value { + v := reflect.New(outType) + // NOTE: r2 is only the floating return value on 64bit platforms. + // On 32bit platforms r2 is the upper part of a 64bit return. + v.SetFloat(math.Float64frombits(uint64(r2))) + return []reflect.Value{v} + } + default: + panic("purego: unsupported return kind: " + outType.Kind().String()) + } + } + + v := reflect.MakeFunc(ty, func(args []reflect.Value) (results []reflect.Value) { + if len(args) > 0 { + if variadic, ok := args[len(args)-1].Interface().([]interface{}); ok { + // subtract one from args bc the last argument in args is []interface{} + // which we are currently expanding + tmp := make([]reflect.Value, len(args)-1+len(variadic)) + n := copy(tmp, args[:len(args)-1]) + for i, v := range variadic { + tmp[n+i] = reflect.ValueOf(v) + } + args = tmp + } + } + // Reset stack (Registers? wording) + numInts = 0 + numFloats = 0 + numStack = 0 + //keepAlive = nil + defer func() { + runtime.KeepAlive(keepAlive) + runtime.KeepAlive(args) + }() + for i, v := range args { + addFuncs[i](v) + } + // TODO: support structs + var r1, r2 uintptr + if runtime.GOARCH == "arm64" || runtime.GOOS != "windows" { + // Use the normal arm64 calling convention even on Windows + syscall := syscall9Args{ + cfn, + sysargs[0], sysargs[1], sysargs[2], sysargs[3], sysargs[4], sysargs[5], sysargs[6], sysargs[7], sysargs[8], + floats[0], floats[1], floats[2], floats[3], floats[4], floats[5], floats[6], floats[7], + 0, 0, 0, + } + runtime_cgocall(syscall9XABI0, unsafe.Pointer(&syscall)) + r1, r2 = syscall.r1, syscall.r2 + } else { + // This is a fallback for Windows amd64, 386, and arm. Note this may not support floats + r1, r2, _ = syscall_syscall9X(cfn, sysargs[0], sysargs[1], sysargs[2], sysargs[3], sysargs[4], sysargs[5], sysargs[6], sysargs[7], sysargs[8]) + } + + return outFunc(r1, r2) + }) + fn.Set(v) +} + func numOfIntegerRegisters() int { switch runtime.GOARCH { case "arm64": diff --git a/func_test.go b/func_test.go index 11dd07f8..fb0808bf 100644 --- a/func_test.go +++ b/func_test.go @@ -86,3 +86,152 @@ func Test_qsort(t *testing.T) { } } } + +// BenchmarkRegisterFuncQsort-16 558045 2064 ns/op 264 B/op 6 allocs/op +func BenchmarkRegisterFuncQsort(b *testing.B) { + library, err := getSystemLibrary() + if err != nil { + b.Errorf("couldn't get system library: %s", err) + } + libc, err := openLibrary(library) + if err != nil { + b.Errorf("failed to dlopen: %s", err) + } + + data := []int{88, 56, 100, 2, 25} + compare := func(a, b *int) int { + return *a - *b + } + var qsort func(data []int, nitms uintptr, size uintptr, compar func(a, b *int) int) + purego.RegisterLibFunc(&qsort, libc, "qsort") + b.ResetTimer() + for n := 0; n < b.N; n++ { + qsort(data, uintptr(len(data)), unsafe.Sizeof(int(0)), compare) + } +} + +// BenchmarkRegisterFuncStrlen-16 2411634 490.4 ns/op 120 B/op 6 allocs/op +func BenchmarkRegisterFuncStrlen(b *testing.B) { + library, err := getSystemLibrary() + if err != nil { + b.Errorf("couldn't get system library: %s", err) + } + libc, err := openLibrary(library) + if err != nil { + b.Errorf("failed to dlopen: %s", err) + } + var strlen func(string) int + purego.RegisterLibFunc(&strlen, libc, "strlen") + b.ResetTimer() + for n := 0; n < b.N; n++ { + strlen("abcdefghijklmnopqrstuvwxyz") + } +} + +// v2 + +func Test2RegisterFuncPuts(t *testing.T) { + library, err := getSystemLibrary() + if err != nil { + t.Errorf("couldn't get system library: %s", err) + } + libc, err := openLibrary(library) + if err != nil { + t.Errorf("failed to dlopen: %s", err) + } + var puts func(string) + purego.RegisterLibFunc2(&puts, libc, "puts") + puts("Calling C from from Go without Cgo! 2") + puts("Calling C from from Go without Cgo! 3") + puts("Calling C from from Go without Cgo! 4") +} + +func Test2_strlen(t *testing.T) { + library, err := getSystemLibrary() + if err != nil { + t.Errorf("couldn't get system library: %s", err) + } + libc, err := openLibrary(library) + if err != nil { + t.Errorf("failed to dlopen: %s", err) + } + var strlen func(string) int + purego.RegisterLibFunc2(&strlen, libc, "strlen") + count := strlen("abcdefghijklmnopqrstuvwxyz") + if count != 26 { + t.Errorf("strlen(0): expected 26 but got %d", count) + } + count = strlen("abcdefghijklmnopqrstuvwxyz") + if count != 26 { + t.Errorf("strlen(1): expected 26 but got %d", count) + } +} + +func Test2_qsort(t *testing.T) { + library, err := getSystemLibrary() + if err != nil { + t.Errorf("couldn't get system library: %s", err) + } + libc, err := openLibrary(library) + if err != nil { + t.Errorf("failed to dlopen: %s", err) + } + + data := []int{88, 56, 100, 2, 25} + sorted := []int{2, 25, 56, 88, 100} + compare := func(a, b *int) int { + return *a - *b + } + var qsort func(data []int, nitms uintptr, size uintptr, compar func(a, b *int) int) + purego.RegisterLibFunc2(&qsort, libc, "qsort") + qsort(data, uintptr(len(data)), unsafe.Sizeof(int(0)), compare) + qsort(data, uintptr(len(data)), unsafe.Sizeof(int(0)), compare) + qsort(data, uintptr(len(data)), unsafe.Sizeof(int(0)), compare) + qsort(data, uintptr(len(data)), unsafe.Sizeof(int(0)), compare) + for i := range data { + if data[i] != sorted[i] { + t.Errorf("got %d wanted %d at %d", data[i], sorted[i], i) + } + } +} + +// Benchmark2RegisterFuncQsort-16 558032 2057 ns/op 264 B/op 6 allocs/op +func Benchmark2RegisterFuncQsort(b *testing.B) { + library, err := getSystemLibrary() + if err != nil { + b.Errorf("couldn't get system library: %s", err) + } + libc, err := openLibrary(library) + if err != nil { + b.Errorf("failed to dlopen: %s", err) + } + + data := []int{88, 56, 100, 2, 25} + compare := func(a, b *int) int { + return *a - *b + } + var qsort func(data []int, nitms uintptr, size uintptr, compar func(a, b *int) int) + purego.RegisterLibFunc2(&qsort, libc, "qsort") + b.ResetTimer() + for n := 0; n < b.N; n++ { + qsort(data, uintptr(len(data)), unsafe.Sizeof(int(0)), compare) + } +} + +// Benchmark2RegisterFuncStrlen-16 2502175 461.1 ns/op 190 B/op 5 allocs/op +func Benchmark2RegisterFuncStrlen(b *testing.B) { + library, err := getSystemLibrary() + if err != nil { + b.Errorf("couldn't get system library: %s", err) + } + libc, err := openLibrary(library) + if err != nil { + b.Errorf("failed to dlopen: %s", err) + } + var strlen func(string) int + purego.RegisterLibFunc2(&strlen, libc, "strlen") + b.ResetTimer() + for n := 0; n < b.N; n++ { + strlen("abcdefghijklmnopqrstuvwxyz") + } +} From ca13512e21c291f32e3500ffaa989bb15dcaf784 Mon Sep 17 00:00:00 2001 From: Zyko Date: Sun, 8 Oct 2023 13:13:22 +0200 Subject: [PATCH 02/12] poc generic / less reflection --- func.go | 802 ++++++++++++++++++++++++++++++++++++--------------- func_test.go | 412 +++++++++++++++----------- 2 files changed, 823 insertions(+), 391 deletions(-) diff --git a/func.go b/func.go index ecd2dbd5..76035095 100644 --- a/func.go +++ b/func.go @@ -24,12 +24,12 @@ func RegisterLibFunc(fptr interface{}, handle uintptr, name string) { RegisterFunc(fptr, sym) } -func RegisterLibFunc2(fptr interface{}, handle uintptr, name string) { +func Symbol(handle uintptr, name string) uintptr { sym, err := loadSymbol(handle, name) if err != nil { panic(err) } - RegisterFunc2(fptr, sym) + return sym } // RegisterFunc takes a pointer to a Go function representing the calling convention of the C function. @@ -206,16 +206,16 @@ func RegisterFunc(fptr interface{}, cfn uintptr) { addFloat = addStack } - var keepAlive []interface{} - defer func() { + //var keepAlive []interface{} + /*defer func() { runtime.KeepAlive(keepAlive) runtime.KeepAlive(args) - }() + }()*/ for _, v := range args { switch v.Kind() { case reflect.String: ptr := strings.CString(v.String()) - keepAlive = append(keepAlive, ptr) + //keepAlive = append(keepAlive, ptr) addInt(uintptr(unsafe.Pointer(ptr))) case reflect.Uintptr, reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64: addInt(uintptr(v.Uint())) @@ -292,252 +292,594 @@ func RegisterFunc(fptr interface{}, cfn uintptr) { fn.Set(v) } -func RegisterFunc2(fptr interface{}, cfn uintptr) { - fn := reflect.ValueOf(fptr).Elem() - ty := fn.Type() - if ty.Kind() != reflect.Func { - panic("purego: fptr must be a function pointer") +// WIP: Less reflection below + +type syscallStackArm64NoWin [1 + maxArgs + numOfFloats]uintptr + +func (ss *syscallStackArm64NoWin) numStack() uintptr { + return ss[0] & 0b1111 +} + +func (ss *syscallStackArm64NoWin) numInts() uintptr { + return (ss[0] >> 4) & 0b1111 +} + +func (ss *syscallStackArm64NoWin) numFloats() uintptr { + return (ss[0] >> 8) & 0b1111 +} + +func (ss *syscallStackArm64NoWin) addStack(x uintptr) { + n := ss.numStack() + ss[1+n] = x + ss[0] = (ss[0] - n) | (n + 1) +} + +func (ss *syscallStackArm64NoWin) addInt(x uintptr) { + n := ss.numInts() + if int(n) >= numOfIntegerRegisters() { + ss.addStack(x) + } else { + ss[1+n] = x + ss[0] = ss[0] - n | ((n + 1) << 4) } - if ty.NumOut() > 1 { - panic("purego: function can only return zero or one values") +} + +func (ss *syscallStackArm64NoWin) addFloat(x uintptr) { + n := ss.numFloats() + if int(n) < numOfFloats { + ss[1+maxArgs+n] = x + ss[0] = ss[0] - n | ((n + 1) << 8) + } else { + ss.addStack(x) } - if cfn == 0 { - panic("purego: cfn is nil") +} + +func (ss *syscallStackArm64NoWin) SysArgs() []uintptr { + return ss[1:] +} + +func (ss *syscallStackArm64NoWin) Floats() []uintptr { + return ss[1+maxArgs:] +} + +type syscallStackAmd64OrWin syscallStackArm64NoWin + +func (ss *syscallStackAmd64OrWin) numStack() uintptr { + return ss[0] & 0b1111 +} + +func (ss *syscallStackAmd64OrWin) numInts() uintptr { + return (ss[0] >> 4) & 0b1111 +} + +func (ss *syscallStackAmd64OrWin) numFloats() uintptr { + return (ss[0] >> 8) & 0b1111 +} + +func (ss *syscallStackAmd64OrWin) addStack(x uintptr) { + n := ss.numStack() + ss[1+n] = x + ss[0] = (ss[0] - n) | (n + 1) +} + +func (ss *syscallStackAmd64OrWin) addInt(x uintptr) { + ss.addStack(x) +} + +func (ss *syscallStackAmd64OrWin) addFloat(x uintptr) { + ss.addStack(x) +} + +func (ss *syscallStackAmd64OrWin) SysArgs() []uintptr { + return ss[1:] +} + +func (ss *syscallStackAmd64OrWin) Floats() []uintptr { + return ss[1+maxArgs:] +} + +type syscallStack interface { + SysArgs() []uintptr + Floats() []uintptr + + addStack(x uintptr) + addInt(x uintptr) + addFloat(x uintptr) +} + +func newSyscallStack() syscallStack { + if runtime.GOARCH == "arm64" || runtime.GOOS != "windows" { + return &syscallStackArm64NoWin{} } - { - // this code checks how many registers and stack this function will use - // to avoid crashing with too many arguments - var ints int - var floats int - var stack int - for i := 0; i < ty.NumIn(); i++ { - arg := ty.In(i) - switch arg.Kind() { - case reflect.String, reflect.Uintptr, reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, - reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Ptr, reflect.UnsafePointer, reflect.Slice, - reflect.Func, reflect.Bool: - if ints < numOfIntegerRegisters() { - ints++ - } else { - stack++ - } - case reflect.Float32, reflect.Float64: - if floats < numOfFloats { - floats++ - } else { - stack++ - } - default: - panic("purego: unsupported kind " + arg.Kind().String()) - } + return &syscallStackAmd64OrWin{} +} + +func uintToPtr[T ~uint | ~uint8 | ~uint16 | ~uint32 | ~uint64 | ~uintptr](v T) uintptr { + return uintptr(uint64(v)) +} + +func intToPtr[T ~int | ~int8 | ~int16 | ~int32 | ~int64](v T) uintptr { + return uintptr(int64(v)) +} + +func getAddFunc[T any]() func(syscallStack, T) { + // TODO: support structs + var v T + switch any(v).(type) { + case int: + return func(r syscallStack, x T) { + r.addInt(intToPtr(any(x).(int))) } - sizeOfStack := maxArgs - numOfIntegerRegisters() - if stack > sizeOfStack { - panic("purego: too many arguments") + case int8: + return func(r syscallStack, x T) { + r.addInt(intToPtr(any(x).(int8))) } - } - - var sysargs [maxArgs]uintptr - stack := sysargs[numOfIntegerRegisters():] - var floats [numOfFloats]uintptr - var numInts int - var numFloats int - var numStack int - var addStack, addInt, addFloat func(x uintptr) - if runtime.GOARCH == "arm64" || runtime.GOOS != "windows" { - // Windows arm64 uses the same calling convention as macOS and Linux - addStack = func(x uintptr) { - stack[numStack] = x - numStack++ + case int16: + return func(r syscallStack, x T) { + r.addInt(intToPtr(any(x).(int16))) + } + case int32: + return func(r syscallStack, x T) { + r.addInt(intToPtr(any(x).(int32))) + } + case int64: + return func(r syscallStack, x T) { + r.addInt(intToPtr(any(x).(int64))) + } + case uint: + return func(r syscallStack, x T) { + r.addInt(uintToPtr(any(x).(uint))) + } + case uint8: + return func(r syscallStack, x T) { + r.addInt(uintToPtr(any(x).(uint8))) + } + case uint16: + return func(r syscallStack, x T) { + r.addInt(uintToPtr(any(x).(uint16))) + } + case uint32: + return func(r syscallStack, x T) { + r.addInt(uintToPtr(any(x).(uint32))) + } + case uint64: + return func(r syscallStack, x T) { + r.addInt(uintToPtr(any(x).(uint64))) + } + case uintptr: + return func(r syscallStack, x T) { + r.addInt(uintToPtr(any(x).(uintptr))) + } + case float32: + return func(r syscallStack, x T) { + r.addFloat(uintptr(math.Float32bits(any(x).(float32)))) } - addInt = func(x uintptr) { - if numInts >= numOfIntegerRegisters() { - addStack(x) + case float64: + return func(r syscallStack, x T) { + r.addFloat(uintptr(math.Float64bits(any(x).(float64)))) + } + case bool: + return func(r syscallStack, x T) { + if any(x).(bool) { + r.addInt(1) } else { - sysargs[numInts] = x - numInts++ + r.addInt(0) } } - addFloat = func(x uintptr) { - if numFloats < len(floats) { - floats[numFloats] = x - numFloats++ - } else { - addStack(x) + case string: + return func(r syscallStack, x T) { + ptr := strings.CString(any(x).(string)) + r.addInt(uintptr(unsafe.Pointer(ptr))) + } + + default: + return func(r syscallStack, x T) { + rv := reflect.ValueOf(x) + switch rv.Kind() { + case reflect.Ptr, reflect.UnsafePointer, reflect.Slice: + // There is no need to keepAlive this pointer separately because it is kept alive in the args variable + r.addInt(rv.Pointer()) + case reflect.Func: + r.addInt(NewCallback(rv.Interface())) + default: + panic("purego: unsupported kind: " + rv.Kind().String()) } } - } else { - // On Windows amd64 the arguments are passed in the numbered registered. - // So the first int is in the first integer register and the first float - // is in the second floating register if there is already a first int. - // This is in contrast to how macOS and Linux pass arguments which - // tries to use as many registers as possible in the calling convention. - addStack = func(x uintptr) { - sysargs[numStack] = x - numStack++ + } +} + +func retInts[T int | int8 | int16 | int32 | int64 | uint | uint8 | uint16 | uint32 | uint64 | uintptr, V any](r1, r2 uintptr) V { + return any(T(r1)).(V) +} + +func retBool[V any](r1, r2 uintptr) V { + return any(r1 != 0).(V) +} + +func getReturnFunc[T any]() func(r1, r2 uintptr) T { + var v T + switch any(v).(type) { + case int: + return retInts[int, T] + case int8: + return retInts[int8, T] + case int16: + return retInts[int16, T] + case int32: + return retInts[int32, T] + case int64: + return retInts[int64, T] + case uint: + return retInts[uint, T] + case uint8: + return retInts[uint8, T] + case uint16: + return retInts[uint16, T] + case uint32: + return retInts[uint32, T] + case uint64: + return retInts[uint64, T] + case uintptr: + return retInts[uintptr, T] + case float32: + return func(r1, r2 uintptr) T { + return any(math.Float32frombits(uint32(r2))).(T) + } + case float64: + return func(r1, r2 uintptr) T { + return any(math.Float64frombits(uint64(r2))).(T) + } + case bool: + return retBool[T] + case string: + return func(r1, r2 uintptr) T { + return any(strings.GoString(r1)).(T) + } + case unsafe.Pointer: + // We take the address and then dereference it to trick go vet from creating a possible miss-use of unsafe.Pointer + return func(r1, r2 uintptr) T { + return any(*(*unsafe.Pointer)(unsafe.Pointer(&r1))).(T) } - addInt = addStack - addFloat = addStack + // Note: funcs and ptrs handled via reflect + default: + // TODO: below + /*u := reflect.ValueOf(v) + switch v.Elem().Type().Kind() { + case reflect.Ptr: + // It is safe to have the address of r1 not escape because it is immediately dereferenced with .Elem() + v.Set(reflect.NewAt(v.Type(), runtime_noescape(unsafe.Pointer(&r1))).Elem()) + case reflect.Func: + // wrap this C function in a nicely typed Go function + fv := reflect.New(v.Type()) + // Note: cannot use a generic one unfortunately + RegisterFunc(fv.Interface(), r1) + v.Set(fv) + default: + panic("purego: unsupported return kind: " + v.Type().Kind().String()) + }*/ } - var keepAlive []interface{} - // Parameters - addFuncs := make([]func(v reflect.Value), ty.NumIn()) + return nil +} + +func argsCheck(ty reflect.Type, cfn uintptr) { + if cfn == 0 { + panic("purego: cfn is nil") + } + // this code checks how many registers and stack this function will use + // to avoid crashing with too many arguments + var ints, floats, stack int for i := 0; i < ty.NumIn(); i++ { arg := ty.In(i) switch arg.Kind() { - case reflect.String: - addFuncs[i] = func(v reflect.Value) { - ptr := strings.CString(v.String()) - keepAlive = append(keepAlive, ptr) - addInt(uintptr(unsafe.Pointer(ptr))) - } - case reflect.Uintptr, reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64: - addFuncs[i] = func(v reflect.Value) { - addInt(uintptr(v.Uint())) - } - case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: - addFuncs[i] = func(v reflect.Value) { - addInt(uintptr(v.Int())) - } - case reflect.Ptr, reflect.UnsafePointer, reflect.Slice: - // There is no need to keepAlive this pointer separately because it is kept alive in the args variable - addFuncs[i] = func(v reflect.Value) { - addInt(v.Pointer()) - } - case reflect.Func: - addFuncs[i] = func(v reflect.Value) { - addInt(NewCallback(v.Interface())) - } - case reflect.Bool: - addFuncs[i] = func(v reflect.Value) { - if v.Bool() { - addInt(1) - } else { - addInt(0) - } - } - case reflect.Float32: - addFuncs[i] = func(v reflect.Value) { - addFloat(uintptr(math.Float32bits(float32(v.Float())))) + case reflect.String, reflect.Uintptr, reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, + reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64, reflect.Ptr, reflect.UnsafePointer, reflect.Slice, + reflect.Func, reflect.Bool: + if ints < numOfIntegerRegisters() { + ints++ + } else { + stack++ } - case reflect.Float64: - addFuncs[i] = func(v reflect.Value) { - addFloat(uintptr(math.Float64bits(v.Float()))) + case reflect.Float32, reflect.Float64: + if floats < numOfFloats { + floats++ + } else { + stack++ } default: - panic("purego: unsupported kind: " + arg.Kind().String()) + panic("purego: unsupported kind " + arg.Kind().String()) } } - // Return value - var outFunc func(r1, r2 uintptr) []reflect.Value = func(_, _ uintptr) []reflect.Value { - return nil + sizeOfStack := maxArgs - numOfIntegerRegisters() + if stack > sizeOfStack { + panic("purego: too many arguments") } - if ty.NumOut() > 0 { - outType := ty.Out(0) - switch outType.Kind() { - case reflect.Uintptr, reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64: - outFunc = func(r1, r2 uintptr) []reflect.Value { - v := reflect.New(outType).Elem() - v.SetUint(uint64(r1)) - return []reflect.Value{v} - } - case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: - outFunc = func(r1, r2 uintptr) []reflect.Value { - v := reflect.New(outType).Elem() - v.SetInt(int64(r1)) - return []reflect.Value{v} - } - case reflect.Bool: - outFunc = func(r1, r2 uintptr) []reflect.Value { - v := reflect.New(outType).Elem() - v.SetBool(r1 != 0) - return []reflect.Value{v} - } - case reflect.UnsafePointer: - outFunc = func(r1, r2 uintptr) []reflect.Value { - v := reflect.New(outType).Elem() - // We take the address and then dereference it to trick go vet from creating a possible miss-use of unsafe.Pointer - v.SetPointer(*(*unsafe.Pointer)(unsafe.Pointer(&r1))) - return []reflect.Value{v} - } - case reflect.Ptr: - outFunc = func(r1, r2 uintptr) []reflect.Value { - // It is safe to have the address of r1 not escape because it is immediately dereferenced with .Elem() - v := reflect.NewAt(outType, runtime_noescape(unsafe.Pointer(&r1))).Elem() - return []reflect.Value{v} - } - case reflect.Func: - outFunc = func(r1, r2 uintptr) []reflect.Value { - // wrap this C function in a nicely typed Go function - v := reflect.New(outType) - RegisterFunc(v.Interface(), r1) - return []reflect.Value{v} - } - case reflect.String: - outFunc = func(r1, r2 uintptr) []reflect.Value { - v := reflect.New(outType) - v.SetString(strings.GoString(r1)) - return []reflect.Value{v} - } - case reflect.Float32, reflect.Float64: - outFunc = func(r1, r2 uintptr) []reflect.Value { - v := reflect.New(outType) - // NOTE: r2 is only the floating return value on 64bit platforms. - // On 32bit platforms r2 is the upper part of a 64bit return. - v.SetFloat(math.Float64frombits(uint64(r2))) - return []reflect.Value{v} - } - default: - panic("purego: unsupported return kind: " + outType.Kind().String()) +} + +// Note: Can be removed, just to benchmark direct syscalls to compare, should not be exposed +func Syscall9(cfn uintptr, sysargs [maxArgs]uintptr) (uintptr, uintptr) { + r1, r2, _ := syscall_syscall9X(cfn, sysargs[0], sysargs[1], sysargs[2], sysargs[3], sysargs[4], sysargs[5], sysargs[6], sysargs[7], sysargs[8]) + return r1, r2 +} + +// Convenience to avoid code repetition in all instances of RegisterFuncI_O +func runtime_call(reg syscallStack, cfn uintptr) (uintptr, uintptr) { + var r1, r2 uintptr + sysargs, floats := reg.SysArgs(), reg.Floats() + if runtime.GOARCH == "arm64" || runtime.GOOS != "windows" { + // Use the normal arm64 calling convention even on Windows + syscall := syscall9Args{ + cfn, + sysargs[0], sysargs[1], sysargs[2], sysargs[3], sysargs[4], sysargs[5], sysargs[6], sysargs[7], sysargs[8], + floats[0], floats[1], floats[2], floats[3], floats[4], floats[5], floats[6], floats[7], + 0, 0, 0, } + runtime_cgocall(syscall9XABI0, unsafe.Pointer(&syscall)) + r1, r2 = syscall.r1, syscall.r2 + } else { + // This is a fallback for Windows amd64, 386, and arm. Note this may not support floats + r1, r2, _ = syscall_syscall9X(cfn, sysargs[0], sysargs[1], sysargs[2], sysargs[3], sysargs[4], sysargs[5], sysargs[6], sysargs[7], sysargs[8]) } - v := reflect.MakeFunc(ty, func(args []reflect.Value) (results []reflect.Value) { - if len(args) > 0 { - if variadic, ok := args[len(args)-1].Interface().([]interface{}); ok { - // subtract one from args bc the last argument in args is []interface{} - // which we are currently expanding - tmp := make([]reflect.Value, len(args)-1+len(variadic)) - n := copy(tmp, args[:len(args)-1]) - for i, v := range variadic { - tmp[n+i] = reflect.ValueOf(v) - } - args = tmp - } - } - // Reset stack (Registers? wording) - numInts = 0 - numFloats = 0 - numStack = 0 - //keepAlive = nil - defer func() { - runtime.KeepAlive(keepAlive) - runtime.KeepAlive(args) - }() - for i, v := range args { - addFuncs[i](v) - } - // TODO: support structs - var r1, r2 uintptr - if runtime.GOARCH == "arm64" || runtime.GOOS != "windows" { - // Use the normal arm64 calling convention even on Windows - syscall := syscall9Args{ - cfn, - sysargs[0], sysargs[1], sysargs[2], sysargs[3], sysargs[4], sysargs[5], sysargs[6], sysargs[7], sysargs[8], - floats[0], floats[1], floats[2], floats[3], floats[4], floats[5], floats[6], floats[7], - 0, 0, 0, - } - runtime_cgocall(syscall9XABI0, unsafe.Pointer(&syscall)) - r1, r2 = syscall.r1, syscall.r2 - } else { - // This is a fallback for Windows amd64, 386, and arm. Note this may not support floats - r1, r2, _ = syscall_syscall9X(cfn, sysargs[0], sysargs[1], sysargs[2], sysargs[3], sysargs[4], sysargs[5], sysargs[6], sysargs[7], sysargs[8]) - } + return r1, r2 +} - return outFunc(r1, r2) - }) - fn.Set(v) +// No return value + +func RegisterFunc1_0[I0 any](fptr *func(I0), cfn uintptr) { + ty := reflect.ValueOf(fptr).Elem().Type() + // Prevent too many registers and check func address is okay + argsCheck(ty, cfn) + func0 := getAddFunc[I0]() + // Create new function + *fptr = func(i0 I0) { + // Create new syscall stack + reg := newSyscallStack() + // Add inputs in registers + func0(reg, i0) + // Function call + runtime_call(reg, cfn) + } +} + +func RegisterFunc2_0[I0, I1 any](fptr *func(I0, I1), cfn uintptr) { + ty := reflect.ValueOf(fptr).Elem().Type() + // Prevent too many registers and check func address is okay + argsCheck(ty, cfn) + func0 := getAddFunc[I0]() + func1 := getAddFunc[I1]() + // Create new function + *fptr = func(i0 I0, i1 I1) { + // Create new syscall stack + reg := newSyscallStack() + // Add inputs in registers + func0(reg, i0) + func1(reg, i1) + // Function call + runtime_call(reg, cfn) + } +} + +func RegisterFunc3_0[I0, I1, I2 any](fptr *func(I0, I1, I2), cfn uintptr) { + ty := reflect.ValueOf(fptr).Elem().Type() + // Prevent too many registers and check func address is okay + argsCheck(ty, cfn) + func0 := getAddFunc[I0]() + func1 := getAddFunc[I1]() + func2 := getAddFunc[I2]() + // Create new function + *fptr = func(i0 I0, i1 I1, i2 I2) { + // Create new syscall stack + reg := newSyscallStack() + // Add inputs in registers + func0(reg, i0) + func1(reg, i1) + func2(reg, i2) + // Function call + runtime_call(reg, cfn) + } +} + +func RegisterFunc4_0[I0, I1, I2, I3 any](fptr *func(I0, I1, I2, I3), cfn uintptr) { + ty := reflect.ValueOf(fptr).Elem().Type() + // Prevent too many registers and check func address is okay + argsCheck(ty, cfn) + func0 := getAddFunc[I0]() + func1 := getAddFunc[I1]() + func2 := getAddFunc[I2]() + func3 := getAddFunc[I3]() + // Create new function + *fptr = func(i0 I0, i1 I1, i2 I2, i3 I3) { + // Create new syscall stack + reg := newSyscallStack() + // Add inputs in registers + func0(reg, i0) + func1(reg, i1) + func2(reg, i2) + func3(reg, i3) + // Function call + runtime_call(reg, cfn) + } +} + +func RegisterFunc5_0[I0, I1, I2, I3, I4 any](fptr *func(I0, I1, I2, I3, I4), cfn uintptr) { + ty := reflect.ValueOf(fptr).Elem().Type() + // Prevent too many registers and check func address is okay + argsCheck(ty, cfn) + func0 := getAddFunc[I0]() + func1 := getAddFunc[I1]() + func2 := getAddFunc[I2]() + func3 := getAddFunc[I3]() + func4 := getAddFunc[I4]() + // Create new function + *fptr = func(i0 I0, i1 I1, i2 I2, i3 I3, i4 I4) { + // Create new syscall stack + reg := newSyscallStack() + // Add inputs in registers + func0(reg, i0) + func1(reg, i1) + func2(reg, i2) + func3(reg, i3) + func4(reg, i4) + // Function call + runtime_call(reg, cfn) + } +} + +// .. so on + +// 1 return value + +func RegisterFunc1_1[I0, O any](fptr *func(I0) O, cfn uintptr) { + ty := reflect.ValueOf(fptr).Elem().Type() + // Prevent too many registers and check func address is okay + argsCheck(ty, cfn) + returnFunc := getReturnFunc[O]() + func0 := getAddFunc[I0]() + // Create new function + *fptr = func(i0 I0) (out O) { + // Create new syscall stack + reg := newSyscallStack() + // Add inputs in registers + func0(reg, i0) + // Function call + r1, r2 := runtime_call(reg, cfn) + + return returnFunc(r1, r2) + } +} + +func RegisterFunc2_1[I0, I1, O any](fptr *func(I0, I1) O, cfn uintptr) { + ty := reflect.ValueOf(fptr).Elem().Type() + // Prevent too many registers and check func address is okay + argsCheck(ty, cfn) + returnFunc := getReturnFunc[O]() + func0 := getAddFunc[I0]() + func1 := getAddFunc[I1]() + // Create new function + *fptr = func(i0 I0, i1 I1) (out O) { + // Create new syscall stack + reg := newSyscallStack() + // Add inputs in registers + func0(reg, i0) + func1(reg, i1) + // Function call + r1, r2 := runtime_call(reg, cfn) + + return returnFunc(r1, r2) + } +} + +func RegisterFunc3_1[I0, I1, I2, O any](fptr *func(I0, I1, I2) O, cfn uintptr) { + ty := reflect.ValueOf(fptr).Elem().Type() + // Prevent too many registers and check func address is okay + argsCheck(ty, cfn) + returnFunc := getReturnFunc[O]() + func0 := getAddFunc[I0]() + func1 := getAddFunc[I1]() + func2 := getAddFunc[I2]() + // Create new function + *fptr = func(i0 I0, i1 I1, i2 I2) (out O) { + // Create new syscall stack + reg := newSyscallStack() + // Add inputs in registers + func0(reg, i0) + func1(reg, i1) + func2(reg, i2) + // Function call + r1, r2 := runtime_call(reg, cfn) + + return returnFunc(r1, r2) + } +} + +func RegisterFunc4_1[I0, I1, I2, I3, O any](fptr *func(I0, I1, I2, I3) O, cfn uintptr) { + ty := reflect.ValueOf(fptr).Elem().Type() + // Prevent too many registers and check func address is okay + argsCheck(ty, cfn) + returnFunc := getReturnFunc[O]() + func0 := getAddFunc[I0]() + func1 := getAddFunc[I1]() + func2 := getAddFunc[I2]() + func3 := getAddFunc[I3]() + // Create new function + *fptr = func(i0 I0, i1 I1, i2 I2, i3 I3) (out O) { + // Create new syscall stack + reg := newSyscallStack() + // Add inputs in registers + func0(reg, i0) + func1(reg, i1) + func2(reg, i2) + func3(reg, i3) + // Function call + r1, r2 := runtime_call(reg, cfn) + + return returnFunc(r1, r2) + } +} + +func RegisterFunc5_1[I0, I1, I2, I3, I4, O any](fptr *func(I0, I1, I2, I3, I4) O, cfn uintptr) { + ty := reflect.ValueOf(fptr).Elem().Type() + // Prevent too many registers and check func address is okay + argsCheck(ty, cfn) + returnFunc := getReturnFunc[O]() + func0 := getAddFunc[I0]() + func1 := getAddFunc[I1]() + func2 := getAddFunc[I2]() + func3 := getAddFunc[I3]() + func4 := getAddFunc[I4]() + // Create new function + *fptr = func(i0 I0, i1 I1, i2 I2, i3 I3, i4 I4) (out O) { + // Create new syscall stack + reg := newSyscallStack() + // Add inputs in registers + func0(reg, i0) + func1(reg, i1) + func2(reg, i2) + func3(reg, i3) + func4(reg, i4) + // Function call + r1, r2 := runtime_call(reg, cfn) + + return returnFunc(r1, r2) + } +} + +// TODO: missing 6-8 + +func RegisterFunc9_1[I0, I1, I2, I3, I4, I5, I6, I7, I8, O any](fptr *func(I0, I1, I2, I3, I4, I5, I6, I7, I8) O, cfn uintptr) { + ty := reflect.ValueOf(fptr).Elem().Type() + // Prevent too many registers and check func address is okay + argsCheck(ty, cfn) + returnFunc := getReturnFunc[O]() + func0 := getAddFunc[I0]() + func1 := getAddFunc[I1]() + func2 := getAddFunc[I2]() + func3 := getAddFunc[I3]() + func4 := getAddFunc[I4]() + func5 := getAddFunc[I5]() + func6 := getAddFunc[I6]() + func7 := getAddFunc[I7]() + func8 := getAddFunc[I8]() + // Create new function + *fptr = func(i0 I0, i1 I1, i2 I2, i3 I3, i4 I4, i5 I5, i6 I6, i7 I7, i8 I8) (out O) { + // Create new syscall stack + reg := newSyscallStack() + // Add inputs in registers + func0(reg, i0) + func1(reg, i1) + func2(reg, i2) + func3(reg, i3) + func4(reg, i4) + func5(reg, i5) + func6(reg, i6) + func7(reg, i7) + func8(reg, i8) + // Function call + r1, r2 := runtime_call(reg, cfn) + + return returnFunc(r1, r2) + } } func numOfIntegerRegisters() int { @@ -546,11 +888,11 @@ func numOfIntegerRegisters() int { return 8 case "amd64": return 6 - // TODO: figure out why 386 tests are not working - /*case "386": - return 0 - case "arm": - return 4*/ + // TODO: figure out why 386 tests are not working + /*case "386": + return 0 + case "arm": + return 4*/ default: panic("purego: unknown GOARCH (" + runtime.GOARCH + ")") } diff --git a/func_test.go b/func_test.go index fb0808bf..3c35729f 100644 --- a/func_test.go +++ b/func_test.go @@ -10,6 +10,7 @@ import ( "unsafe" "github.com/ebitengine/purego" + "github.com/ebitengine/purego/internal/strings" ) // This is an internal OS-dependent function for getting the handle to a library @@ -32,120 +33,216 @@ func getSystemLibrary() (string, error) { } } -func TestRegisterFunc(t *testing.T) { - library, err := getSystemLibrary() - if err != nil { - t.Errorf("couldn't get system library: %s", err) - } - libc, err := openLibrary(library) - if err != nil { - t.Errorf("failed to dlopen: %s", err) - } - var puts func(string) - purego.RegisterLibFunc(&puts, libc, "puts") - puts("Calling C from from Go without Cgo!") -} +// NewCallBack -func ExampleNewCallback() { - cb := purego.NewCallback(func(a1, a2, a3, a4, a5, a6, a7, a8, a9 int) int { - fmt.Println(a1, a2, a3, a4, a5, a6, a7, a8, a9) - return a1 + a2 + a3 + a4 + a5 + a6 + a7 + a8 + a9 +func Test_NewCallBack(t *testing.T) { + // Original + t.Run("RegisterFunc(original)", func(t *testing.T) { + cb := purego.NewCallback(func(a1, a2, a3, a4, a5, a6, a7, a8, a9 int) int { + fmt.Println(a1, a2, a3, a4, a5, a6, a7, a8, a9) + return a1 + a2 + a3 + a4 + a5 + a6 + a7 + a8 + a9 + }) + + var fn func(a1, a2, a3, a4, a5, a6, a7, a8, a9 int) int + purego.RegisterFunc(&fn, cb) + + ret := fn(1, 2, 3, 4, 5, 6, 7, 8, 9) + fmt.Println(ret) + + // Output: 1 2 3 4 5 6 7 8 9 + // 45 }) + // New + t.Run("RegisterFunc9_1", func(t *testing.T) { + cb := purego.NewCallback(func(a1, a2, a3, a4, a5, a6, a7, a8, a9 int) int { + fmt.Println(a1, a2, a3, a4, a5, a6, a7, a8, a9) + return a1 + a2 + a3 + a4 + a5 + a6 + a7 + a8 + a9 + }) - var fn func(a1, a2, a3, a4, a5, a6, a7, a8, a9 int) int - purego.RegisterFunc(&fn, cb) + var fn func(a1, a2, a3, a4, a5, a6, a7, a8, a9 int) int + purego.RegisterFunc9_1(&fn, cb) - ret := fn(1, 2, 3, 4, 5, 6, 7, 8, 9) - fmt.Println(ret) + ret := fn(1, 2, 3, 4, 5, 6, 7, 8, 9) + fmt.Println(ret) - // Output: 1 2 3 4 5 6 7 8 9 - // 45 + // Output: 1 2 3 4 5 6 7 8 9 + // 45 + }) } -func Test_qsort(t *testing.T) { - library, err := getSystemLibrary() - if err != nil { - t.Errorf("couldn't get system library: %s", err) - } - libc, err := openLibrary(library) - if err != nil { - t.Errorf("failed to dlopen: %s", err) - } +func Benchmark_NewCallBack(b *testing.B) { + // Original + b.Run("RegisterFunc(original)", func(b *testing.B) { + // 1000000, 1111 ns/op, 328 B/op, 12 allocs/op + cb := purego.NewCallback(func(a1, a2, a3, a4, a5, a6, a7, a8, a9 int) int { + return a1 + a2 + a3 + a4 + a5 + a6 + a7 + a8 + a9 + }) - data := []int{88, 56, 100, 2, 25} - sorted := []int{2, 25, 56, 88, 100} - compare := func(a, b *int) int { - return *a - *b - } - var qsort func(data []int, nitms uintptr, size uintptr, compar func(a, b *int) int) - purego.RegisterLibFunc(&qsort, libc, "qsort") - qsort(data, uintptr(len(data)), unsafe.Sizeof(int(0)), compare) - for i := range data { - if data[i] != sorted[i] { - t.Errorf("got %d wanted %d at %d", data[i], sorted[i], i) + var fn func(a1, a2, a3, a4, a5, a6, a7, a8, a9 int) int + purego.RegisterFunc(&fn, cb) + b.ResetTimer() + for i := 0; i < b.N; i++ { + _ = fn(1, 2, 3, 4, 5, 6, 7, 8, 9) } - } + }) + // New + b.Run("RegisterFunc9_1(new)", func(b *testing.B) { + // 3153188, 383.6 ns/op, 144 B/op, 1 allocs/op + cb := purego.NewCallback(func(a1, a2, a3, a4, a5, a6, a7, a8, a9 int) int { + return a1 + a2 + a3 + a4 + a5 + a6 + a7 + a8 + a9 + }) + + var fn func(a1, a2, a3, a4, a5, a6, a7, a8, a9 int) int + purego.RegisterFunc9_1(&fn, cb) + b.ResetTimer() + for i := 0; i < b.N; i++ { + _ = fn(1, 2, 3, 4, 5, 6, 7, 8, 9) + } + }) } -// BenchmarkRegisterFuncQsort-16 558045 2064 ns/op 264 B/op 6 allocs/op -func BenchmarkRegisterFuncQsort(b *testing.B) { - library, err := getSystemLibrary() - if err != nil { - b.Errorf("couldn't get system library: %s", err) - } - libc, err := openLibrary(library) - if err != nil { - b.Errorf("failed to dlopen: %s", err) - } +// qsort - data := []int{88, 56, 100, 2, 25} - compare := func(a, b *int) int { - return *a - *b - } - var qsort func(data []int, nitms uintptr, size uintptr, compar func(a, b *int) int) - purego.RegisterLibFunc(&qsort, libc, "qsort") - b.ResetTimer() - for n := 0; n < b.N; n++ { +func Test_qsort(t *testing.T) { + // Original + t.Run("RegisterFunc(original)", func(t *testing.T) { + library, err := getSystemLibrary() + if err != nil { + t.Errorf("couldn't get system library: %s", err) + } + libc, err := openLibrary(library) + if err != nil { + t.Errorf("failed to dlopen: %s", err) + } + + data := []int{88, 56, 100, 2, 25} + sorted := []int{2, 25, 56, 88, 100} + compare := func(a, b *int) int { + return *a - *b + } + var qsort func(data []int, nitms uintptr, size uintptr, compar func(a, b *int) int) + purego.RegisterLibFunc(&qsort, libc, "qsort") qsort(data, uintptr(len(data)), unsafe.Sizeof(int(0)), compare) - } + for i := range data { + if data[i] != sorted[i] { + t.Errorf("got %d wanted %d at %d", data[i], sorted[i], i) + } + } + }) + // New + t.Run("RegisterFunc4_0(new)", func(t *testing.T) { + library, err := getSystemLibrary() + if err != nil { + t.Errorf("couldn't get system library: %s", err) + } + libc, err := openLibrary(library) + if err != nil { + t.Errorf("failed to dlopen: %s", err) + } + + data := []int{88, 56, 100, 2, 25} + sorted := []int{2, 25, 56, 88, 100} + compare := func(a, b *int) int { + return *a - *b + } + var qsort func(data []int, nitms uintptr, size uintptr, compar func(a, b *int) int) + symbol := purego.Symbol(libc, "qsort") + purego.RegisterFunc4_0(&qsort, symbol) + qsort(data, uintptr(len(data)), unsafe.Sizeof(int(0)), compare) + for i := range data { + if data[i] != sorted[i] { + t.Errorf("got %d wanted %d at %d", data[i], sorted[i], i) + } + } + }) } -// BenchmarkRegisterFuncStrlen-16 2411634 490.4 ns/op 120 B/op 6 allocs/op -func BenchmarkRegisterFuncStrlen(b *testing.B) { - library, err := getSystemLibrary() - if err != nil { - b.Errorf("couldn't get system library: %s", err) - } - libc, err := openLibrary(library) - if err != nil { - b.Errorf("failed to dlopen: %s", err) - } - var strlen func(string) int - purego.RegisterLibFunc(&strlen, libc, "strlen") - b.ResetTimer() - for n := 0; n < b.N; n++ { - strlen("abcdefghijklmnopqrstuvwxyz") - } +func Benchmark_qsort(b *testing.B) { + // Original + b.Run("RegisterFunc(original)", func(b *testing.B) { + // 558027, 2067 ns/op, 264 B/op, 6 allocs/op + library, err := getSystemLibrary() + if err != nil { + b.Errorf("couldn't get system library: %s", err) + } + libc, err := openLibrary(library) + if err != nil { + b.Errorf("failed to dlopen: %s", err) + } + + data := []int{88, 56, 100, 2, 25} + compare := func(a, b *int) int { + return *a - *b + } + var qsort func(data []int, nitms uintptr, size uintptr, compar func(a, b *int) int) + purego.RegisterLibFunc(&qsort, libc, "qsort") + b.ResetTimer() + for n := 0; n < b.N; n++ { + qsort(data, uintptr(len(data)), unsafe.Sizeof(int(0)), compare) + } + }) + // New + b.Run("RegisterFunc1_0(new)", func(b *testing.B) { + // 648578, 1806 ns/op, 296 B/op, 4 allocs/op + library, err := getSystemLibrary() + if err != nil { + b.Errorf("couldn't get system library: %s", err) + } + libc, err := openLibrary(library) + if err != nil { + b.Errorf("failed to dlopen: %s", err) + } + + data := []int{88, 56, 100, 2, 25} + compare := func(a, b *int) int { + return *a - *b + } + var qsort func(data []int, nitms uintptr, size uintptr, compar func(a, b *int) int) + symbol := purego.Symbol(libc, "qsort") + purego.RegisterFunc4_0(&qsort, symbol) + b.ResetTimer() + for n := 0; n < b.N; n++ { + qsort(data, uintptr(len(data)), unsafe.Sizeof(int(0)), compare) + } + }) } -// v2 +// puts -func Test2RegisterFuncPuts(t *testing.T) { - library, err := getSystemLibrary() - if err != nil { - t.Errorf("couldn't get system library: %s", err) - } - libc, err := openLibrary(library) - if err != nil { - t.Errorf("failed to dlopen: %s", err) - } - var puts func(string) - purego.RegisterLibFunc2(&puts, libc, "puts") - puts("Calling C from from Go without Cgo! 2") - puts("Calling C from from Go without Cgo! 3") - puts("Calling C from from Go without Cgo! 4") +func Test_puts(t *testing.T) { + // Original + t.Run("RegisterFunc(original)", func(t *testing.T) { + library, err := getSystemLibrary() + if err != nil { + t.Errorf("couldn't get system library: %s", err) + } + libc, err := openLibrary(library) + if err != nil { + t.Errorf("failed to dlopen: %s", err) + } + var puts func(string) + purego.RegisterLibFunc(&puts, libc, "puts") + puts("Calling C from from Go without Cgo! (original)") + }) + // New + t.Run("RegisterFunc1_0(new)", func(t *testing.T) { + library, err := getSystemLibrary() + if err != nil { + t.Errorf("couldn't get system library: %s", err) + } + libc, err := openLibrary(library) + if err != nil { + t.Errorf("failed to dlopen: %s", err) + } + var puts func(string) + symbol := purego.Symbol(libc, "puts") + purego.RegisterFunc1_0(&puts, symbol) + puts("Calling C from from Go without Cgo! (new)") + }) } +// strlen + func Test2_strlen(t *testing.T) { library, err := getSystemLibrary() if err != nil { @@ -156,7 +253,8 @@ func Test2_strlen(t *testing.T) { t.Errorf("failed to dlopen: %s", err) } var strlen func(string) int - purego.RegisterLibFunc2(&strlen, libc, "strlen") + symbol := purego.Symbol(libc, "strlen") + purego.RegisterFunc1_1(&strlen, symbol) count := strlen("abcdefghijklmnopqrstuvwxyz") if count != 26 { t.Errorf("strlen(0): expected 26 but got %d", count) @@ -167,71 +265,63 @@ func Test2_strlen(t *testing.T) { } } -func Test2_qsort(t *testing.T) { - library, err := getSystemLibrary() - if err != nil { - t.Errorf("couldn't get system library: %s", err) - } - libc, err := openLibrary(library) - if err != nil { - t.Errorf("failed to dlopen: %s", err) - } - - data := []int{88, 56, 100, 2, 25} - sorted := []int{2, 25, 56, 88, 100} - compare := func(a, b *int) int { - return *a - *b - } - var qsort func(data []int, nitms uintptr, size uintptr, compar func(a, b *int) int) - purego.RegisterLibFunc2(&qsort, libc, "qsort") - qsort(data, uintptr(len(data)), unsafe.Sizeof(int(0)), compare) - qsort(data, uintptr(len(data)), unsafe.Sizeof(int(0)), compare) - qsort(data, uintptr(len(data)), unsafe.Sizeof(int(0)), compare) - qsort(data, uintptr(len(data)), unsafe.Sizeof(int(0)), compare) - for i := range data { - if data[i] != sorted[i] { - t.Errorf("got %d wanted %d at %d", data[i], sorted[i], i) +func Benchmark_strlen(b *testing.B) { + // Current + b.Run("RegisterFunc(original)", func(b *testing.B) { + // 2411634 - 490.4 ns/op - 120 B/op - 6 allocs/op + library, err := getSystemLibrary() + if err != nil { + b.Errorf("couldn't get system library: %s", err) } - } -} - -// Benchmark2RegisterFuncQsort-16 558032 2057 ns/op 264 B/op 6 allocs/op -func Benchmark2RegisterFuncQsort(b *testing.B) { - library, err := getSystemLibrary() - if err != nil { - b.Errorf("couldn't get system library: %s", err) - } - libc, err := openLibrary(library) - if err != nil { - b.Errorf("failed to dlopen: %s", err) - } - - data := []int{88, 56, 100, 2, 25} - compare := func(a, b *int) int { - return *a - *b - } - var qsort func(data []int, nitms uintptr, size uintptr, compar func(a, b *int) int) - purego.RegisterLibFunc2(&qsort, libc, "qsort") - b.ResetTimer() - for n := 0; n < b.N; n++ { - qsort(data, uintptr(len(data)), unsafe.Sizeof(int(0)), compare) - } -} - -// Benchmark2RegisterFuncStrlen-16 2502175 461.1 ns/op 190 B/op 5 allocs/op -func Benchmark2RegisterFuncStrlen(b *testing.B) { - library, err := getSystemLibrary() - if err != nil { - b.Errorf("couldn't get system library: %s", err) - } - libc, err := openLibrary(library) - if err != nil { - b.Errorf("failed to dlopen: %s", err) - } - var strlen func(string) int - purego.RegisterLibFunc2(&strlen, libc, "strlen") - b.ResetTimer() - for n := 0; n < b.N; n++ { - strlen("abcdefghijklmnopqrstuvwxyz") - } + libc, err := openLibrary(library) + if err != nil { + b.Errorf("failed to dlopen: %s", err) + } + var strlen func(string) int + purego.RegisterLibFunc(&strlen, libc, "strlen") + b.ResetTimer() + for n := 0; n < b.N; n++ { + strlen("abcdefghijklmnopqrstuvwxyz") + } + }) + // New + b.Run("RegisterFunc1_1(new)", func(b *testing.B) { + // 7690965 - 157.0 ns/op - 176 B/op - 2 allocs/op + library, err := getSystemLibrary() + if err != nil { + b.Errorf("couldn't get system library: %s", err) + } + libc, err := openLibrary(library) + if err != nil { + b.Errorf("failed to dlopen: %s", err) + } + var strlen func(string) int + symbol := purego.Symbol(libc, "strlen") + purego.RegisterFunc1_1(&strlen, symbol) + b.ResetTimer() + for n := 0; n < b.N; n++ { + strlen("abcdefghijklmnopqrstuvwxyz") + } + }) + // Direct + b.Run("Syscall9", func(b *testing.B) { + // 11762629 - 100.5 ns/op - 32 B/op - 1 allocs/op + library, err := getSystemLibrary() + if err != nil { + b.Errorf("couldn't get system library: %s", err) + } + libc, err := openLibrary(library) + if err != nil { + b.Errorf("failed to dlopen: %s", err) + } + symbol := purego.Symbol(libc, "strlen") + b.ResetTimer() + for n := 0; n < b.N; n++ { + ptr := strings.CString("abcdefghijklmnopqrstuvwxyz") + sysargs := [9]uintptr{ + uintptr(unsafe.Pointer(ptr)), + } + _, _ = purego.Syscall9(symbol, sysargs) + } + }) } From fdab9e906932161e6070e81af0cd27eaee8b03c1 Mon Sep 17 00:00:00 2001 From: Zyko Date: Sun, 8 Oct 2023 13:37:12 +0200 Subject: [PATCH 03/12] renamed reg to ss for syscallstack --- func.go | 100 ++++++++++++++++++++++++++++---------------------------- 1 file changed, 50 insertions(+), 50 deletions(-) diff --git a/func.go b/func.go index 76035095..e88549b4 100644 --- a/func.go +++ b/func.go @@ -634,11 +634,11 @@ func RegisterFunc1_0[I0 any](fptr *func(I0), cfn uintptr) { // Create new function *fptr = func(i0 I0) { // Create new syscall stack - reg := newSyscallStack() + ss := newSyscallStack() // Add inputs in registers - func0(reg, i0) + func0(ss, i0) // Function call - runtime_call(reg, cfn) + runtime_call(ss, cfn) } } @@ -651,12 +651,12 @@ func RegisterFunc2_0[I0, I1 any](fptr *func(I0, I1), cfn uintptr) { // Create new function *fptr = func(i0 I0, i1 I1) { // Create new syscall stack - reg := newSyscallStack() + ss := newSyscallStack() // Add inputs in registers - func0(reg, i0) - func1(reg, i1) + func0(ss, i0) + func1(ss, i1) // Function call - runtime_call(reg, cfn) + runtime_call(ss, cfn) } } @@ -670,13 +670,13 @@ func RegisterFunc3_0[I0, I1, I2 any](fptr *func(I0, I1, I2), cfn uintptr) { // Create new function *fptr = func(i0 I0, i1 I1, i2 I2) { // Create new syscall stack - reg := newSyscallStack() + ss := newSyscallStack() // Add inputs in registers - func0(reg, i0) - func1(reg, i1) - func2(reg, i2) + func0(ss, i0) + func1(ss, i1) + func2(ss, i2) // Function call - runtime_call(reg, cfn) + runtime_call(ss, cfn) } } @@ -691,14 +691,14 @@ func RegisterFunc4_0[I0, I1, I2, I3 any](fptr *func(I0, I1, I2, I3), cfn uintptr // Create new function *fptr = func(i0 I0, i1 I1, i2 I2, i3 I3) { // Create new syscall stack - reg := newSyscallStack() + ss := newSyscallStack() // Add inputs in registers - func0(reg, i0) - func1(reg, i1) - func2(reg, i2) - func3(reg, i3) + func0(ss, i0) + func1(ss, i1) + func2(ss, i2) + func3(ss, i3) // Function call - runtime_call(reg, cfn) + runtime_call(ss, cfn) } } @@ -714,15 +714,15 @@ func RegisterFunc5_0[I0, I1, I2, I3, I4 any](fptr *func(I0, I1, I2, I3, I4), cfn // Create new function *fptr = func(i0 I0, i1 I1, i2 I2, i3 I3, i4 I4) { // Create new syscall stack - reg := newSyscallStack() + ss := newSyscallStack() // Add inputs in registers - func0(reg, i0) - func1(reg, i1) - func2(reg, i2) - func3(reg, i3) - func4(reg, i4) + func0(ss, i0) + func1(ss, i1) + func2(ss, i2) + func3(ss, i3) + func4(ss, i4) // Function call - runtime_call(reg, cfn) + runtime_call(ss, cfn) } } @@ -739,11 +739,11 @@ func RegisterFunc1_1[I0, O any](fptr *func(I0) O, cfn uintptr) { // Create new function *fptr = func(i0 I0) (out O) { // Create new syscall stack - reg := newSyscallStack() + ss := newSyscallStack() // Add inputs in registers - func0(reg, i0) + func0(ss, i0) // Function call - r1, r2 := runtime_call(reg, cfn) + r1, r2 := runtime_call(ss, cfn) return returnFunc(r1, r2) } @@ -759,12 +759,12 @@ func RegisterFunc2_1[I0, I1, O any](fptr *func(I0, I1) O, cfn uintptr) { // Create new function *fptr = func(i0 I0, i1 I1) (out O) { // Create new syscall stack - reg := newSyscallStack() + ss := newSyscallStack() // Add inputs in registers - func0(reg, i0) - func1(reg, i1) + func0(ss, i0) + func1(ss, i1) // Function call - r1, r2 := runtime_call(reg, cfn) + r1, r2 := runtime_call(ss, cfn) return returnFunc(r1, r2) } @@ -781,13 +781,13 @@ func RegisterFunc3_1[I0, I1, I2, O any](fptr *func(I0, I1, I2) O, cfn uintptr) { // Create new function *fptr = func(i0 I0, i1 I1, i2 I2) (out O) { // Create new syscall stack - reg := newSyscallStack() + ss := newSyscallStack() // Add inputs in registers - func0(reg, i0) - func1(reg, i1) - func2(reg, i2) + func0(ss, i0) + func1(ss, i1) + func2(ss, i2) // Function call - r1, r2 := runtime_call(reg, cfn) + r1, r2 := runtime_call(ss, cfn) return returnFunc(r1, r2) } @@ -805,14 +805,14 @@ func RegisterFunc4_1[I0, I1, I2, I3, O any](fptr *func(I0, I1, I2, I3) O, cfn ui // Create new function *fptr = func(i0 I0, i1 I1, i2 I2, i3 I3) (out O) { // Create new syscall stack - reg := newSyscallStack() + ss := newSyscallStack() // Add inputs in registers - func0(reg, i0) - func1(reg, i1) - func2(reg, i2) - func3(reg, i3) + func0(ss, i0) + func1(ss, i1) + func2(ss, i2) + func3(ss, i3) // Function call - r1, r2 := runtime_call(reg, cfn) + r1, r2 := runtime_call(ss, cfn) return returnFunc(r1, r2) } @@ -831,15 +831,15 @@ func RegisterFunc5_1[I0, I1, I2, I3, I4, O any](fptr *func(I0, I1, I2, I3, I4) O // Create new function *fptr = func(i0 I0, i1 I1, i2 I2, i3 I3, i4 I4) (out O) { // Create new syscall stack - reg := newSyscallStack() + ss := newSyscallStack() // Add inputs in registers - func0(reg, i0) - func1(reg, i1) - func2(reg, i2) - func3(reg, i3) - func4(reg, i4) + func0(ss, i0) + func1(ss, i1) + func2(ss, i2) + func3(ss, i3) + func4(ss, i4) // Function call - r1, r2 := runtime_call(reg, cfn) + r1, r2 := runtime_call(ss, cfn) return returnFunc(r1, r2) } From 5f8e140a4c86c7a310d4579911ea6b3e53090a74 Mon Sep 17 00:00:00 2001 From: Zyko Date: Sun, 8 Oct 2023 15:05:49 +0200 Subject: [PATCH 04/12] Add cos to test floats --- func_test.go | 140 +++++++++++++++++++++++++++++++++++++++++++-------- 1 file changed, 120 insertions(+), 20 deletions(-) diff --git a/func_test.go b/func_test.go index 3c35729f..2eeab7c5 100644 --- a/func_test.go +++ b/func_test.go @@ -5,6 +5,7 @@ package purego_test import ( "fmt" + "math" "runtime" "testing" "unsafe" @@ -243,26 +244,28 @@ func Test_puts(t *testing.T) { // strlen -func Test2_strlen(t *testing.T) { - library, err := getSystemLibrary() - if err != nil { - t.Errorf("couldn't get system library: %s", err) - } - libc, err := openLibrary(library) - if err != nil { - t.Errorf("failed to dlopen: %s", err) - } - var strlen func(string) int - symbol := purego.Symbol(libc, "strlen") - purego.RegisterFunc1_1(&strlen, symbol) - count := strlen("abcdefghijklmnopqrstuvwxyz") - if count != 26 { - t.Errorf("strlen(0): expected 26 but got %d", count) - } - count = strlen("abcdefghijklmnopqrstuvwxyz") - if count != 26 { - t.Errorf("strlen(1): expected 26 but got %d", count) - } +func Test_strlen(t *testing.T) { + t.Run("RegisterFunc(original)", func(t *testing.T) { + library, err := getSystemLibrary() + if err != nil { + t.Errorf("couldn't get system library: %s", err) + } + libc, err := openLibrary(library) + if err != nil { + t.Errorf("failed to dlopen: %s", err) + } + var strlen func(string) int + symbol := purego.Symbol(libc, "strlen") + purego.RegisterFunc1_1(&strlen, symbol) + count := strlen("abcdefghijklmnopqrstuvwxyz") + if count != 26 { + t.Errorf("strlen(0): expected 26 but got %d", count) + } + count = strlen("abcdefghijklmnopqrstuvwxyz") + if count != 26 { + t.Errorf("strlen(1): expected 26 but got %d", count) + } + }) } func Benchmark_strlen(b *testing.B) { @@ -325,3 +328,100 @@ func Benchmark_strlen(b *testing.B) { } }) } + +// cos + +func Test_cos(t *testing.T) { + // Original + t.Run("RegisterFunc(original)", func(t *testing.T) { + library, err := getSystemLibrary() + if err != nil { + t.Errorf("couldn't get system library: %s", err) + } + libc, err := openLibrary(library) + if err != nil { + t.Errorf("failed to dlopen: %s", err) + } + var cos func(float64) float64 + purego.RegisterLibFunc(&cos, libc, "cos") + // 0.05428962282295477 + const v = 1.51648 + expected := math.Cos(v) + actual := cos(v) + if expected != actual { + t.Errorf("cos(%.8f): expected %.8f but got %.8f", v, expected, actual) + } + }) + // New + t.Run("RegisterFunc1_1(new)", func(t *testing.T) { + library, err := getSystemLibrary() + if err != nil { + t.Errorf("couldn't get system library: %s", err) + } + libc, err := openLibrary(library) + if err != nil { + t.Errorf("failed to dlopen: %s", err) + } + var cos func(float64) float64 + symbol := purego.Symbol(libc, "cos") + purego.RegisterFunc1_1(&cos, symbol) + // 0.05428962282295477 + const v = 1.51648 + expected := math.Cos(v) + actual := cos(v) + if expected != actual { + t.Errorf("cos(%.8f): expected %.8f but got %.8f", v, expected, actual) + } + }) +} + +func Benchmark_cos(b *testing.B) { + // Original + b.Run("RegisterFunc(original)", func(b *testing.B) { + // 3337392, 362.0 ns/op, 64 B/op, 4 allocs/op + library, err := getSystemLibrary() + if err != nil { + b.Errorf("couldn't get system library: %s", err) + } + libc, err := openLibrary(library) + if err != nil { + b.Errorf("failed to dlopen: %s", err) + } + var cos func(float64) float64 + purego.RegisterLibFunc(&cos, libc, "cos") + // 0.05428962282295477 + const v = 1.51648 + b.ResetTimer() + for i := 0; i < b.N; i++ { + _ = cos(v) + } + }) + // New + b.Run("RegisterFunc1_1(new)", func(b *testing.B) { + // 9300645, 129.0 ns/op, 144 B/op, 1 allocs/op + library, err := getSystemLibrary() + if err != nil { + b.Errorf("couldn't get system library: %s", err) + } + libc, err := openLibrary(library) + if err != nil { + b.Errorf("failed to dlopen: %s", err) + } + var cos func(float64) float64 + symbol := purego.Symbol(libc, "cos") + purego.RegisterFunc1_1(&cos, symbol) + // 0.05428962282295477 + const v = 1.51648 + b.ResetTimer() + for i := 0; i < b.N; i++ { + _ = cos(v) + } + }) + // Go + b.Run("Go", func(b *testing.B) { + const v = 1.51648 + for i := 0; i < b.N; i++ { + _ = math.Cos(v) + } + }) +} From e3d1ca6ff134d8f8cac773c2ba38c087e42c1247 Mon Sep 17 00:00:00 2001 From: Zyko Date: Sun, 8 Oct 2023 15:15:12 +0200 Subject: [PATCH 05/12] Added isupper to test bool --- func_test.go | 86 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 86 insertions(+) diff --git a/func_test.go b/func_test.go index 2eeab7c5..989562ad 100644 --- a/func_test.go +++ b/func_test.go @@ -425,3 +425,89 @@ func Benchmark_cos(b *testing.B) { } }) } + +// isupper + +func Test_isupper(t *testing.T) { + // Original + t.Run("RegisterFunc(original)", func(t *testing.T) { + library, err := getSystemLibrary() + if err != nil { + t.Errorf("couldn't get system library: %s", err) + } + libc, err := openLibrary(library) + if err != nil { + t.Errorf("failed to dlopen: %s", err) + } + var isupper func(c rune) bool + purego.RegisterLibFunc(&isupper, libc, "isupper") + actual := isupper('A') + if !actual { + t.Errorf("isupper('%c'): expected true but got false", 'A') + } + actual = isupper('a') + if actual { + t.Errorf("isupper('%c'): expected false but got true", 'a') + } + }) + // New + t.Run("RegisterFunc1_1(new)", func(t *testing.T) { + library, err := getSystemLibrary() + if err != nil { + t.Errorf("couldn't get system library: %s", err) + } + libc, err := openLibrary(library) + if err != nil { + t.Errorf("failed to dlopen: %s", err) + } + var isupper func(c rune) bool + symbol := purego.Symbol(libc, "isupper") + purego.RegisterFunc(&isupper, symbol) + actual := isupper('A') + if !actual { + t.Errorf("isupper('%c'): expected true but got false", 'A') + } + actual = isupper('a') + if actual { + t.Errorf("isupper('%c'): expected false but got true", 'a') + } + }) +} + +func Benchmark_isupper(b *testing.B) { + // Original + b.Run("RegisterFunc(original)", func(b *testing.B) { + // 3037436, 395.6 ns/op, 56 B/op, 4 allocs/op + library, err := getSystemLibrary() + if err != nil { + b.Errorf("couldn't get system library: %s", err) + } + libc, err := openLibrary(library) + if err != nil { + b.Errorf("failed to dlopen: %s", err) + } + var isupper func(c rune) bool + purego.RegisterLibFunc(&isupper, libc, "isupper") + for i := 0; i < b.N; i++ { + _ = isupper('A') + } + }) + // New + b.Run("RegisterFunc1_1(new)", func(b *testing.B) { + // 10082194, 121.2 ns/op, 144 B/op, 1 allocs/op + library, err := getSystemLibrary() + if err != nil { + b.Errorf("couldn't get system library: %s", err) + } + libc, err := openLibrary(library) + if err != nil { + b.Errorf("failed to dlopen: %s", err) + } + var isupper func(c rune) bool + symbol := purego.Symbol(libc, "isupper") + purego.RegisterFunc1_1(&isupper, symbol) + for i := 0; i < b.N; i++ { + _ = isupper('A') + } + }) +} From 3d90cb88f9bf8d0b4731733b948cb4c320953c84 Mon Sep 17 00:00:00 2001 From: Zyko Date: Sun, 8 Oct 2023 15:36:19 +0200 Subject: [PATCH 06/12] Restore RegisterFunc's keepalive the way it was --- func.go | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/func.go b/func.go index e88549b4..11c25e2b 100644 --- a/func.go +++ b/func.go @@ -206,11 +206,11 @@ func RegisterFunc(fptr interface{}, cfn uintptr) { addFloat = addStack } - //var keepAlive []interface{} - /*defer func() { + var keepAlive []interface{} + defer func() { runtime.KeepAlive(keepAlive) runtime.KeepAlive(args) - }()*/ + }() for _, v := range args { switch v.Kind() { case reflect.String: From 9c6220c62e46d93d488196e646136a16de3511e1 Mon Sep 17 00:00:00 2001 From: Zyko Date: Sun, 8 Oct 2023 15:37:00 +0200 Subject: [PATCH 07/12] missed one --- func.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/func.go b/func.go index 11c25e2b..97edc588 100644 --- a/func.go +++ b/func.go @@ -215,7 +215,7 @@ func RegisterFunc(fptr interface{}, cfn uintptr) { switch v.Kind() { case reflect.String: ptr := strings.CString(v.String()) - //keepAlive = append(keepAlive, ptr) + keepAlive = append(keepAlive, ptr) addInt(uintptr(unsafe.Pointer(ptr))) case reflect.Uintptr, reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64: addInt(uintptr(v.Uint())) From 353963f744039012914d33413ab2c9541de90dcb Mon Sep 17 00:00:00 2001 From: Zyko Date: Sun, 8 Oct 2023 15:39:12 +0200 Subject: [PATCH 08/12] fixed naming --- func_test.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/func_test.go b/func_test.go index 989562ad..79f084fb 100644 --- a/func_test.go +++ b/func_test.go @@ -245,7 +245,7 @@ func Test_puts(t *testing.T) { // strlen func Test_strlen(t *testing.T) { - t.Run("RegisterFunc(original)", func(t *testing.T) { + t.Run("RegisterFunc1_1(new)", func(t *testing.T) { library, err := getSystemLibrary() if err != nil { t.Errorf("couldn't get system library: %s", err) From 382bfa8966b100131f8bf5b8394fc0cdd6def0bd Mon Sep 17 00:00:00 2001 From: Zyko Date: Sun, 8 Oct 2023 15:53:49 +0200 Subject: [PATCH 09/12] Fixed bitwise operations --- func.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/func.go b/func.go index 97edc588..0047927c 100644 --- a/func.go +++ b/func.go @@ -320,7 +320,7 @@ func (ss *syscallStackArm64NoWin) addInt(x uintptr) { ss.addStack(x) } else { ss[1+n] = x - ss[0] = ss[0] - n | ((n + 1) << 4) + ss[0] = (ss[0] - (n << 4)) | ((n + 1) << 4) } } @@ -328,7 +328,7 @@ func (ss *syscallStackArm64NoWin) addFloat(x uintptr) { n := ss.numFloats() if int(n) < numOfFloats { ss[1+maxArgs+n] = x - ss[0] = ss[0] - n | ((n + 1) << 8) + ss[0] = (ss[0] - (n << 8)) | ((n + 1) << 8) } else { ss.addStack(x) } From df7a889b43f4e0c5c6d4b451cef5acc61ac8e415 Mon Sep 17 00:00:00 2001 From: Zyko Date: Sun, 8 Oct 2023 16:06:24 +0200 Subject: [PATCH 10/12] Removed useless syscall9 since SyscallN already exists --- func.go | 18 ++++++------------ func_test.go | 6 +++--- 2 files changed, 9 insertions(+), 15 deletions(-) diff --git a/func.go b/func.go index 0047927c..cdf71d7c 100644 --- a/func.go +++ b/func.go @@ -596,12 +596,6 @@ func argsCheck(ty reflect.Type, cfn uintptr) { } } -// Note: Can be removed, just to benchmark direct syscalls to compare, should not be exposed -func Syscall9(cfn uintptr, sysargs [maxArgs]uintptr) (uintptr, uintptr) { - r1, r2, _ := syscall_syscall9X(cfn, sysargs[0], sysargs[1], sysargs[2], sysargs[3], sysargs[4], sysargs[5], sysargs[6], sysargs[7], sysargs[8]) - return r1, r2 -} - // Convenience to avoid code repetition in all instances of RegisterFuncI_O func runtime_call(reg syscallStack, cfn uintptr) (uintptr, uintptr) { var r1, r2 uintptr @@ -737,7 +731,7 @@ func RegisterFunc1_1[I0, O any](fptr *func(I0) O, cfn uintptr) { returnFunc := getReturnFunc[O]() func0 := getAddFunc[I0]() // Create new function - *fptr = func(i0 I0) (out O) { + *fptr = func(i0 I0) O { // Create new syscall stack ss := newSyscallStack() // Add inputs in registers @@ -757,7 +751,7 @@ func RegisterFunc2_1[I0, I1, O any](fptr *func(I0, I1) O, cfn uintptr) { func0 := getAddFunc[I0]() func1 := getAddFunc[I1]() // Create new function - *fptr = func(i0 I0, i1 I1) (out O) { + *fptr = func(i0 I0, i1 I1) O { // Create new syscall stack ss := newSyscallStack() // Add inputs in registers @@ -779,7 +773,7 @@ func RegisterFunc3_1[I0, I1, I2, O any](fptr *func(I0, I1, I2) O, cfn uintptr) { func1 := getAddFunc[I1]() func2 := getAddFunc[I2]() // Create new function - *fptr = func(i0 I0, i1 I1, i2 I2) (out O) { + *fptr = func(i0 I0, i1 I1, i2 I2) O { // Create new syscall stack ss := newSyscallStack() // Add inputs in registers @@ -803,7 +797,7 @@ func RegisterFunc4_1[I0, I1, I2, I3, O any](fptr *func(I0, I1, I2, I3) O, cfn ui func2 := getAddFunc[I2]() func3 := getAddFunc[I3]() // Create new function - *fptr = func(i0 I0, i1 I1, i2 I2, i3 I3) (out O) { + *fptr = func(i0 I0, i1 I1, i2 I2, i3 I3) O { // Create new syscall stack ss := newSyscallStack() // Add inputs in registers @@ -829,7 +823,7 @@ func RegisterFunc5_1[I0, I1, I2, I3, I4, O any](fptr *func(I0, I1, I2, I3, I4) O func3 := getAddFunc[I3]() func4 := getAddFunc[I4]() // Create new function - *fptr = func(i0 I0, i1 I1, i2 I2, i3 I3, i4 I4) (out O) { + *fptr = func(i0 I0, i1 I1, i2 I2, i3 I3, i4 I4) O { // Create new syscall stack ss := newSyscallStack() // Add inputs in registers @@ -862,7 +856,7 @@ func RegisterFunc9_1[I0, I1, I2, I3, I4, I5, I6, I7, I8, O any](fptr *func(I0, I func7 := getAddFunc[I7]() func8 := getAddFunc[I8]() // Create new function - *fptr = func(i0 I0, i1 I1, i2 I2, i3 I3, i4 I4, i5 I5, i6 I6, i7 I7, i8 I8) (out O) { + *fptr = func(i0 I0, i1 I1, i2 I2, i3 I3, i4 I4, i5 I5, i6 I6, i7 I7, i8 I8) O { // Create new syscall stack reg := newSyscallStack() // Add inputs in registers diff --git a/func_test.go b/func_test.go index 79f084fb..091afb38 100644 --- a/func_test.go +++ b/func_test.go @@ -307,8 +307,8 @@ func Benchmark_strlen(b *testing.B) { } }) // Direct - b.Run("Syscall9", func(b *testing.B) { - // 11762629 - 100.5 ns/op - 32 B/op - 1 allocs/op + b.Run("SyscallN", func(b *testing.B) { + // 8449221, 142.2 ns/op, 112 B/op, 2 allocs/op library, err := getSystemLibrary() if err != nil { b.Errorf("couldn't get system library: %s", err) @@ -324,7 +324,7 @@ func Benchmark_strlen(b *testing.B) { sysargs := [9]uintptr{ uintptr(unsafe.Pointer(ptr)), } - _, _ = purego.Syscall9(symbol, sysargs) + _, _, _ = purego.SyscallN(symbol, sysargs[:]...) } }) } From b101fcd60051efc8cd85adcecf140aa299d5b47e Mon Sep 17 00:00:00 2001 From: Zyko Date: Sun, 8 Oct 2023 16:21:51 +0200 Subject: [PATCH 11/12] few renames --- func.go | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) diff --git a/func.go b/func.go index cdf71d7c..dcd8c312 100644 --- a/func.go +++ b/func.go @@ -597,9 +597,9 @@ func argsCheck(ty reflect.Type, cfn uintptr) { } // Convenience to avoid code repetition in all instances of RegisterFuncI_O -func runtime_call(reg syscallStack, cfn uintptr) (uintptr, uintptr) { +func runtime_call(ss syscallStack, cfn uintptr) (uintptr, uintptr) { var r1, r2 uintptr - sysargs, floats := reg.SysArgs(), reg.Floats() + sysargs, floats := ss.SysArgs(), ss.Floats() if runtime.GOARCH == "arm64" || runtime.GOOS != "windows" { // Use the normal arm64 calling convention even on Windows syscall := syscall9Args{ @@ -858,19 +858,19 @@ func RegisterFunc9_1[I0, I1, I2, I3, I4, I5, I6, I7, I8, O any](fptr *func(I0, I // Create new function *fptr = func(i0 I0, i1 I1, i2 I2, i3 I3, i4 I4, i5 I5, i6 I6, i7 I7, i8 I8) O { // Create new syscall stack - reg := newSyscallStack() + ss := newSyscallStack() // Add inputs in registers - func0(reg, i0) - func1(reg, i1) - func2(reg, i2) - func3(reg, i3) - func4(reg, i4) - func5(reg, i5) - func6(reg, i6) - func7(reg, i7) - func8(reg, i8) + func0(ss, i0) + func1(ss, i1) + func2(ss, i2) + func3(ss, i3) + func4(ss, i4) + func5(ss, i5) + func6(ss, i6) + func7(ss, i7) + func8(ss, i8) // Function call - r1, r2 := runtime_call(reg, cfn) + r1, r2 := runtime_call(ss, cfn) return returnFunc(r1, r2) } From 0b024757f5b6e755c123b915c1706541efaaa066 Mon Sep 17 00:00:00 2001 From: Zyko Date: Sun, 8 Oct 2023 17:59:49 +0200 Subject: [PATCH 12/12] Reorganize new code additions + simplify register func first block --- func.go | 87 +++++++++++++++++++++++----------------------------- func_test.go | 2 +- 2 files changed, 40 insertions(+), 49 deletions(-) diff --git a/func.go b/func.go index dcd8c312..194744ed 100644 --- a/func.go +++ b/func.go @@ -292,8 +292,33 @@ func RegisterFunc(fptr interface{}, cfn uintptr) { fn.Set(v) } +func numOfIntegerRegisters() int { + switch runtime.GOARCH { + case "arm64": + return 8 + case "amd64": + return 6 + // TODO: figure out why 386 tests are not working + /*case "386": + return 0 + case "arm": + return 4*/ + default: + panic("purego: unknown GOARCH (" + runtime.GOARCH + ")") + } +} + // WIP: Less reflection below +type syscallStack interface { + SysArgs() []uintptr + Floats() []uintptr + + addStack(x uintptr) + addInt(x uintptr) + addFloat(x uintptr) +} + type syscallStackArm64NoWin [1 + maxArgs + numOfFloats]uintptr func (ss *syscallStackArm64NoWin) numStack() uintptr { @@ -378,15 +403,6 @@ func (ss *syscallStackAmd64OrWin) Floats() []uintptr { return ss[1+maxArgs:] } -type syscallStack interface { - SysArgs() []uintptr - Floats() []uintptr - - addStack(x uintptr) - addInt(x uintptr) - addFloat(x uintptr) -} - func newSyscallStack() syscallStack { if runtime.GOARCH == "arm64" || runtime.GOOS != "windows" { return &syscallStackArm64NoWin{} @@ -562,13 +578,15 @@ func getReturnFunc[T any]() func(r1, r2 uintptr) T { return nil } -func argsCheck(ty reflect.Type, cfn uintptr) { +func argsCheck(fptr any, cfn uintptr) { if cfn == 0 { panic("purego: cfn is nil") } // this code checks how many registers and stack this function will use // to avoid crashing with too many arguments var ints, floats, stack int + + ty := reflect.ValueOf(fptr).Elem().Type() for i := 0; i < ty.NumIn(); i++ { arg := ty.In(i) switch arg.Kind() { @@ -621,9 +639,8 @@ func runtime_call(ss syscallStack, cfn uintptr) (uintptr, uintptr) { // No return value func RegisterFunc1_0[I0 any](fptr *func(I0), cfn uintptr) { - ty := reflect.ValueOf(fptr).Elem().Type() // Prevent too many registers and check func address is okay - argsCheck(ty, cfn) + argsCheck(fptr, cfn) func0 := getAddFunc[I0]() // Create new function *fptr = func(i0 I0) { @@ -637,9 +654,8 @@ func RegisterFunc1_0[I0 any](fptr *func(I0), cfn uintptr) { } func RegisterFunc2_0[I0, I1 any](fptr *func(I0, I1), cfn uintptr) { - ty := reflect.ValueOf(fptr).Elem().Type() // Prevent too many registers and check func address is okay - argsCheck(ty, cfn) + argsCheck(fptr, cfn) func0 := getAddFunc[I0]() func1 := getAddFunc[I1]() // Create new function @@ -655,9 +671,8 @@ func RegisterFunc2_0[I0, I1 any](fptr *func(I0, I1), cfn uintptr) { } func RegisterFunc3_0[I0, I1, I2 any](fptr *func(I0, I1, I2), cfn uintptr) { - ty := reflect.ValueOf(fptr).Elem().Type() // Prevent too many registers and check func address is okay - argsCheck(ty, cfn) + argsCheck(fptr, cfn) func0 := getAddFunc[I0]() func1 := getAddFunc[I1]() func2 := getAddFunc[I2]() @@ -675,9 +690,8 @@ func RegisterFunc3_0[I0, I1, I2 any](fptr *func(I0, I1, I2), cfn uintptr) { } func RegisterFunc4_0[I0, I1, I2, I3 any](fptr *func(I0, I1, I2, I3), cfn uintptr) { - ty := reflect.ValueOf(fptr).Elem().Type() // Prevent too many registers and check func address is okay - argsCheck(ty, cfn) + argsCheck(fptr, cfn) func0 := getAddFunc[I0]() func1 := getAddFunc[I1]() func2 := getAddFunc[I2]() @@ -697,9 +711,8 @@ func RegisterFunc4_0[I0, I1, I2, I3 any](fptr *func(I0, I1, I2, I3), cfn uintptr } func RegisterFunc5_0[I0, I1, I2, I3, I4 any](fptr *func(I0, I1, I2, I3, I4), cfn uintptr) { - ty := reflect.ValueOf(fptr).Elem().Type() // Prevent too many registers and check func address is okay - argsCheck(ty, cfn) + argsCheck(fptr, cfn) func0 := getAddFunc[I0]() func1 := getAddFunc[I1]() func2 := getAddFunc[I2]() @@ -725,9 +738,8 @@ func RegisterFunc5_0[I0, I1, I2, I3, I4 any](fptr *func(I0, I1, I2, I3, I4), cfn // 1 return value func RegisterFunc1_1[I0, O any](fptr *func(I0) O, cfn uintptr) { - ty := reflect.ValueOf(fptr).Elem().Type() // Prevent too many registers and check func address is okay - argsCheck(ty, cfn) + argsCheck(fptr, cfn) returnFunc := getReturnFunc[O]() func0 := getAddFunc[I0]() // Create new function @@ -744,9 +756,8 @@ func RegisterFunc1_1[I0, O any](fptr *func(I0) O, cfn uintptr) { } func RegisterFunc2_1[I0, I1, O any](fptr *func(I0, I1) O, cfn uintptr) { - ty := reflect.ValueOf(fptr).Elem().Type() // Prevent too many registers and check func address is okay - argsCheck(ty, cfn) + argsCheck(fptr, cfn) returnFunc := getReturnFunc[O]() func0 := getAddFunc[I0]() func1 := getAddFunc[I1]() @@ -765,9 +776,8 @@ func RegisterFunc2_1[I0, I1, O any](fptr *func(I0, I1) O, cfn uintptr) { } func RegisterFunc3_1[I0, I1, I2, O any](fptr *func(I0, I1, I2) O, cfn uintptr) { - ty := reflect.ValueOf(fptr).Elem().Type() // Prevent too many registers and check func address is okay - argsCheck(ty, cfn) + argsCheck(fptr, cfn) returnFunc := getReturnFunc[O]() func0 := getAddFunc[I0]() func1 := getAddFunc[I1]() @@ -788,9 +798,8 @@ func RegisterFunc3_1[I0, I1, I2, O any](fptr *func(I0, I1, I2) O, cfn uintptr) { } func RegisterFunc4_1[I0, I1, I2, I3, O any](fptr *func(I0, I1, I2, I3) O, cfn uintptr) { - ty := reflect.ValueOf(fptr).Elem().Type() // Prevent too many registers and check func address is okay - argsCheck(ty, cfn) + argsCheck(fptr, cfn) returnFunc := getReturnFunc[O]() func0 := getAddFunc[I0]() func1 := getAddFunc[I1]() @@ -813,9 +822,8 @@ func RegisterFunc4_1[I0, I1, I2, I3, O any](fptr *func(I0, I1, I2, I3) O, cfn ui } func RegisterFunc5_1[I0, I1, I2, I3, I4, O any](fptr *func(I0, I1, I2, I3, I4) O, cfn uintptr) { - ty := reflect.ValueOf(fptr).Elem().Type() // Prevent too many registers and check func address is okay - argsCheck(ty, cfn) + argsCheck(fptr, cfn) returnFunc := getReturnFunc[O]() func0 := getAddFunc[I0]() func1 := getAddFunc[I1]() @@ -842,9 +850,8 @@ func RegisterFunc5_1[I0, I1, I2, I3, I4, O any](fptr *func(I0, I1, I2, I3, I4) O // TODO: missing 6-8 func RegisterFunc9_1[I0, I1, I2, I3, I4, I5, I6, I7, I8, O any](fptr *func(I0, I1, I2, I3, I4, I5, I6, I7, I8) O, cfn uintptr) { - ty := reflect.ValueOf(fptr).Elem().Type() // Prevent too many registers and check func address is okay - argsCheck(ty, cfn) + argsCheck(fptr, cfn) returnFunc := getReturnFunc[O]() func0 := getAddFunc[I0]() func1 := getAddFunc[I1]() @@ -875,19 +882,3 @@ func RegisterFunc9_1[I0, I1, I2, I3, I4, I5, I6, I7, I8, O any](fptr *func(I0, I return returnFunc(r1, r2) } } - -func numOfIntegerRegisters() int { - switch runtime.GOARCH { - case "arm64": - return 8 - case "amd64": - return 6 - // TODO: figure out why 386 tests are not working - /*case "386": - return 0 - case "arm": - return 4*/ - default: - panic("purego: unknown GOARCH (" + runtime.GOARCH + ")") - } -} diff --git a/func_test.go b/func_test.go index 091afb38..9cd49f80 100644 --- a/func_test.go +++ b/func_test.go @@ -183,7 +183,7 @@ func Benchmark_qsort(b *testing.B) { } }) // New - b.Run("RegisterFunc1_0(new)", func(b *testing.B) { + b.Run("RegisterFunc4_0(new)", func(b *testing.B) { // 648578, 1806 ns/op, 296 B/op, 4 allocs/op library, err := getSystemLibrary() if err != nil {