Add Goroutine stack inspector to admin/monitor (#19207)
Continues on from #19202. Following the addition of pprof labels we can now more easily understand the relationship between a goroutine and the requests that spawn them. This PR takes advantage of the labels and adds a few others, then provides a mechanism for the monitoring page to query the pprof goroutine profile. The binary profile that results from this profile is immediately piped in to the google library for parsing this and then stack traces are formed for the goroutines. If the goroutine is within a context or has been created from a goroutine within a process context it will acquire the process description labels for that process. The goroutines are mapped with there associate pids and any that do not have an associated pid are placed in a group at the bottom as unbound. In this way we should be able to more easily examine goroutines that have been stuck. A manager command `gitea manager processes` is also provided that can export the processes (with or without stacktraces) to the command line. Signed-off-by: Andrew Thornton <art27@cantab.net>
This commit is contained in:
parent
9c349a4277
commit
c88547ce71
48 changed files with 1479 additions and 595 deletions
|
@ -5,10 +5,12 @@
|
|||
package nosql
|
||||
|
||||
import (
|
||||
"context"
|
||||
"strconv"
|
||||
"sync"
|
||||
"time"
|
||||
|
||||
"code.gitea.io/gitea/modules/process"
|
||||
"github.com/go-redis/redis/v8"
|
||||
"github.com/syndtr/goleveldb/leveldb"
|
||||
)
|
||||
|
@ -17,7 +19,9 @@ var manager *Manager
|
|||
|
||||
// Manager is the nosql connection manager
|
||||
type Manager struct {
|
||||
mutex sync.Mutex
|
||||
ctx context.Context
|
||||
finished context.CancelFunc
|
||||
mutex sync.Mutex
|
||||
|
||||
RedisConnections map[string]*redisClientHolder
|
||||
LevelDBConnections map[string]*levelDBHolder
|
||||
|
@ -46,7 +50,10 @@ func init() {
|
|||
// GetManager returns a Manager and initializes one as singleton is there's none yet
|
||||
func GetManager() *Manager {
|
||||
if manager == nil {
|
||||
ctx, _, finished := process.GetManager().AddTypedContext(context.Background(), "Service: NoSQL", process.SystemProcessType, false)
|
||||
manager = &Manager{
|
||||
ctx: ctx,
|
||||
finished: finished,
|
||||
RedisConnections: make(map[string]*redisClientHolder),
|
||||
LevelDBConnections: make(map[string]*levelDBHolder),
|
||||
}
|
||||
|
|
|
@ -7,6 +7,7 @@ package nosql
|
|||
import (
|
||||
"fmt"
|
||||
"path"
|
||||
"runtime/pprof"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
|
@ -50,7 +51,31 @@ func (m *Manager) CloseLevelDB(connection string) error {
|
|||
}
|
||||
|
||||
// GetLevelDB gets a levelDB for a particular connection
|
||||
func (m *Manager) GetLevelDB(connection string) (*leveldb.DB, error) {
|
||||
func (m *Manager) GetLevelDB(connection string) (db *leveldb.DB, err error) {
|
||||
// Because we want associate any goroutines created by this call to the main nosqldb context we need to
|
||||
// wrap this in a goroutine labelled with the nosqldb context
|
||||
done := make(chan struct{})
|
||||
var recovered interface{}
|
||||
go func() {
|
||||
defer func() {
|
||||
recovered = recover()
|
||||
if recovered != nil {
|
||||
log.Critical("PANIC during GetLevelDB: %v\nStacktrace: %s", recovered, log.Stack(2))
|
||||
}
|
||||
close(done)
|
||||
}()
|
||||
pprof.SetGoroutineLabels(m.ctx)
|
||||
|
||||
db, err = m.getLevelDB(connection)
|
||||
}()
|
||||
<-done
|
||||
if recovered != nil {
|
||||
panic(recovered)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func (m *Manager) getLevelDB(connection string) (*leveldb.DB, error) {
|
||||
// Convert the provided connection description to the common format
|
||||
uri := ToLevelDBURI(connection)
|
||||
|
||||
|
@ -168,15 +193,18 @@ func (m *Manager) GetLevelDB(connection string) (*leveldb.DB, error) {
|
|||
if err != nil {
|
||||
if !errors.IsCorrupted(err) {
|
||||
if strings.Contains(err.Error(), "resource temporarily unavailable") {
|
||||
return nil, fmt.Errorf("unable to lock level db at %s: %w", dataDir, err)
|
||||
err = fmt.Errorf("unable to lock level db at %s: %w", dataDir, err)
|
||||
return nil, err
|
||||
}
|
||||
|
||||
return nil, fmt.Errorf("unable to open level db at %s: %w", dataDir, err)
|
||||
}
|
||||
db.db, err = leveldb.RecoverFile(dataDir, opts)
|
||||
if err != nil {
|
||||
err = fmt.Errorf("unable to open level db at %s: %w", dataDir, err)
|
||||
return nil, err
|
||||
}
|
||||
db.db, err = leveldb.RecoverFile(dataDir, opts)
|
||||
}
|
||||
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
|
||||
for _, name := range db.name {
|
||||
|
|
|
@ -8,6 +8,7 @@ import (
|
|||
"crypto/tls"
|
||||
"net/url"
|
||||
"path"
|
||||
"runtime/pprof"
|
||||
"strconv"
|
||||
"strings"
|
||||
|
||||
|
@ -43,7 +44,31 @@ func (m *Manager) CloseRedisClient(connection string) error {
|
|||
}
|
||||
|
||||
// GetRedisClient gets a redis client for a particular connection
|
||||
func (m *Manager) GetRedisClient(connection string) redis.UniversalClient {
|
||||
func (m *Manager) GetRedisClient(connection string) (client redis.UniversalClient) {
|
||||
// Because we want associate any goroutines created by this call to the main nosqldb context we need to
|
||||
// wrap this in a goroutine labelled with the nosqldb context
|
||||
done := make(chan struct{})
|
||||
var recovered interface{}
|
||||
go func() {
|
||||
defer func() {
|
||||
recovered = recover()
|
||||
if recovered != nil {
|
||||
log.Critical("PANIC during GetRedisClient: %v\nStacktrace: %s", recovered, log.Stack(2))
|
||||
}
|
||||
close(done)
|
||||
}()
|
||||
pprof.SetGoroutineLabels(m.ctx)
|
||||
|
||||
client = m.getRedisClient(connection)
|
||||
}()
|
||||
<-done
|
||||
if recovered != nil {
|
||||
panic(recovered)
|
||||
}
|
||||
return
|
||||
}
|
||||
|
||||
func (m *Manager) getRedisClient(connection string) redis.UniversalClient {
|
||||
m.mutex.Lock()
|
||||
defer m.mutex.Unlock()
|
||||
client, ok := m.RedisConnections[connection]
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue