clientconn.go 42 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316131713181319132013211322132313241325132613271328132913301331133213331334133513361337133813391340134113421343134413451346134713481349135013511352135313541355135613571358135913601361136213631364136513661367136813691370137113721373137413751376137713781379138013811382138313841385138613871388138913901391139213931394139513961397139813991400140114021403140414051406140714081409141014111412141314141415141614171418141914201421142214231424142514261427142814291430143114321433143414351436143714381439144014411442144314441445
  1. /*
  2. *
  3. * Copyright 2014 gRPC authors.
  4. *
  5. * Licensed under the Apache License, Version 2.0 (the "License");
  6. * you may not use this file except in compliance with the License.
  7. * You may obtain a copy of the License at
  8. *
  9. * http://www.apache.org/licenses/LICENSE-2.0
  10. *
  11. * Unless required by applicable law or agreed to in writing, software
  12. * distributed under the License is distributed on an "AS IS" BASIS,
  13. * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
  14. * See the License for the specific language governing permissions and
  15. * limitations under the License.
  16. *
  17. */
  18. package grpc
  19. import (
  20. "context"
  21. "errors"
  22. "fmt"
  23. "math"
  24. "net"
  25. "reflect"
  26. "strings"
  27. "sync"
  28. "sync/atomic"
  29. "time"
  30. "google.golang.org/grpc/balancer"
  31. _ "google.golang.org/grpc/balancer/roundrobin" // To register roundrobin.
  32. "google.golang.org/grpc/codes"
  33. "google.golang.org/grpc/connectivity"
  34. "google.golang.org/grpc/credentials"
  35. "google.golang.org/grpc/grpclog"
  36. "google.golang.org/grpc/internal/backoff"
  37. "google.golang.org/grpc/internal/channelz"
  38. "google.golang.org/grpc/internal/envconfig"
  39. "google.golang.org/grpc/internal/grpcsync"
  40. "google.golang.org/grpc/internal/transport"
  41. "google.golang.org/grpc/keepalive"
  42. "google.golang.org/grpc/metadata"
  43. "google.golang.org/grpc/resolver"
  44. _ "google.golang.org/grpc/resolver/dns" // To register dns resolver.
  45. _ "google.golang.org/grpc/resolver/passthrough" // To register passthrough resolver.
  46. "google.golang.org/grpc/status"
  47. )
  48. const (
  49. // minimum time to give a connection to complete
  50. minConnectTimeout = 20 * time.Second
  51. // must match grpclbName in grpclb/grpclb.go
  52. grpclbName = "grpclb"
  53. )
  54. var (
  55. // ErrClientConnClosing indicates that the operation is illegal because
  56. // the ClientConn is closing.
  57. //
  58. // Deprecated: this error should not be relied upon by users; use the status
  59. // code of Canceled instead.
  60. ErrClientConnClosing = status.Error(codes.Canceled, "grpc: the client connection is closing")
  61. // errConnDrain indicates that the connection starts to be drained and does not accept any new RPCs.
  62. errConnDrain = errors.New("grpc: the connection is drained")
  63. // errConnClosing indicates that the connection is closing.
  64. errConnClosing = errors.New("grpc: the connection is closing")
  65. // errBalancerClosed indicates that the balancer is closed.
  66. errBalancerClosed = errors.New("grpc: balancer is closed")
  67. // We use an accessor so that minConnectTimeout can be
  68. // atomically read and updated while testing.
  69. getMinConnectTimeout = func() time.Duration {
  70. return minConnectTimeout
  71. }
  72. )
  73. // The following errors are returned from Dial and DialContext
  74. var (
  75. // errNoTransportSecurity indicates that there is no transport security
  76. // being set for ClientConn. Users should either set one or explicitly
  77. // call WithInsecure DialOption to disable security.
  78. errNoTransportSecurity = errors.New("grpc: no transport security set (use grpc.WithInsecure() explicitly or set credentials)")
  79. // errTransportCredsAndBundle indicates that creds bundle is used together
  80. // with other individual Transport Credentials.
  81. errTransportCredsAndBundle = errors.New("grpc: credentials.Bundle may not be used with individual TransportCredentials")
  82. // errTransportCredentialsMissing indicates that users want to transmit security
  83. // information (e.g., OAuth2 token) which requires secure connection on an insecure
  84. // connection.
  85. errTransportCredentialsMissing = errors.New("grpc: the credentials require transport level security (use grpc.WithTransportCredentials() to set)")
  86. // errCredentialsConflict indicates that grpc.WithTransportCredentials()
  87. // and grpc.WithInsecure() are both called for a connection.
  88. errCredentialsConflict = errors.New("grpc: transport credentials are set for an insecure connection (grpc.WithTransportCredentials() and grpc.WithInsecure() are both called)")
  89. )
  90. const (
  91. defaultClientMaxReceiveMessageSize = 1024 * 1024 * 4
  92. defaultClientMaxSendMessageSize = math.MaxInt32
  93. // http2IOBufSize specifies the buffer size for sending frames.
  94. defaultWriteBufSize = 32 * 1024
  95. defaultReadBufSize = 32 * 1024
  96. )
  97. // Dial creates a client connection to the given target.
  98. func Dial(target string, opts ...DialOption) (*ClientConn, error) {
  99. return DialContext(context.Background(), target, opts...)
  100. }
  101. // DialContext creates a client connection to the given target. By default, it's
  102. // a non-blocking dial (the function won't wait for connections to be
  103. // established, and connecting happens in the background). To make it a blocking
  104. // dial, use WithBlock() dial option.
  105. //
  106. // In the non-blocking case, the ctx does not act against the connection. It
  107. // only controls the setup steps.
  108. //
  109. // In the blocking case, ctx can be used to cancel or expire the pending
  110. // connection. Once this function returns, the cancellation and expiration of
  111. // ctx will be noop. Users should call ClientConn.Close to terminate all the
  112. // pending operations after this function returns.
  113. //
  114. // The target name syntax is defined in
  115. // https://github.com/grpc/grpc/blob/master/doc/naming.md.
  116. // e.g. to use dns resolver, a "dns:///" prefix should be applied to the target.
  117. func DialContext(ctx context.Context, target string, opts ...DialOption) (conn *ClientConn, err error) {
  118. cc := &ClientConn{
  119. target: target,
  120. csMgr: &connectivityStateManager{},
  121. conns: make(map[*addrConn]struct{}),
  122. dopts: defaultDialOptions(),
  123. blockingpicker: newPickerWrapper(),
  124. czData: new(channelzData),
  125. firstResolveEvent: grpcsync.NewEvent(),
  126. }
  127. cc.retryThrottler.Store((*retryThrottler)(nil))
  128. cc.ctx, cc.cancel = context.WithCancel(context.Background())
  129. for _, opt := range opts {
  130. opt.apply(&cc.dopts)
  131. }
  132. if channelz.IsOn() {
  133. if cc.dopts.channelzParentID != 0 {
  134. cc.channelzID = channelz.RegisterChannel(&channelzChannel{cc}, cc.dopts.channelzParentID, target)
  135. channelz.AddTraceEvent(cc.channelzID, &channelz.TraceEventDesc{
  136. Desc: "Channel Created",
  137. Severity: channelz.CtINFO,
  138. Parent: &channelz.TraceEventDesc{
  139. Desc: fmt.Sprintf("Nested Channel(id:%d) created", cc.channelzID),
  140. Severity: channelz.CtINFO,
  141. },
  142. })
  143. } else {
  144. cc.channelzID = channelz.RegisterChannel(&channelzChannel{cc}, 0, target)
  145. channelz.AddTraceEvent(cc.channelzID, &channelz.TraceEventDesc{
  146. Desc: "Channel Created",
  147. Severity: channelz.CtINFO,
  148. })
  149. }
  150. cc.csMgr.channelzID = cc.channelzID
  151. }
  152. if !cc.dopts.insecure {
  153. if cc.dopts.copts.TransportCredentials == nil && cc.dopts.copts.CredsBundle == nil {
  154. return nil, errNoTransportSecurity
  155. }
  156. if cc.dopts.copts.TransportCredentials != nil && cc.dopts.copts.CredsBundle != nil {
  157. return nil, errTransportCredsAndBundle
  158. }
  159. } else {
  160. if cc.dopts.copts.TransportCredentials != nil || cc.dopts.copts.CredsBundle != nil {
  161. return nil, errCredentialsConflict
  162. }
  163. for _, cd := range cc.dopts.copts.PerRPCCredentials {
  164. if cd.RequireTransportSecurity() {
  165. return nil, errTransportCredentialsMissing
  166. }
  167. }
  168. }
  169. cc.mkp = cc.dopts.copts.KeepaliveParams
  170. if cc.dopts.copts.Dialer == nil {
  171. cc.dopts.copts.Dialer = newProxyDialer(
  172. func(ctx context.Context, addr string) (net.Conn, error) {
  173. network, addr := parseDialTarget(addr)
  174. return (&net.Dialer{}).DialContext(ctx, network, addr)
  175. },
  176. )
  177. }
  178. if cc.dopts.copts.UserAgent != "" {
  179. cc.dopts.copts.UserAgent += " " + grpcUA
  180. } else {
  181. cc.dopts.copts.UserAgent = grpcUA
  182. }
  183. if cc.dopts.timeout > 0 {
  184. var cancel context.CancelFunc
  185. ctx, cancel = context.WithTimeout(ctx, cc.dopts.timeout)
  186. defer cancel()
  187. }
  188. defer func() {
  189. select {
  190. case <-ctx.Done():
  191. conn, err = nil, ctx.Err()
  192. default:
  193. }
  194. if err != nil {
  195. cc.Close()
  196. }
  197. }()
  198. scSet := false
  199. if cc.dopts.scChan != nil {
  200. // Try to get an initial service config.
  201. select {
  202. case sc, ok := <-cc.dopts.scChan:
  203. if ok {
  204. cc.sc = sc
  205. scSet = true
  206. }
  207. default:
  208. }
  209. }
  210. if cc.dopts.bs == nil {
  211. cc.dopts.bs = backoff.Exponential{
  212. MaxDelay: DefaultBackoffConfig.MaxDelay,
  213. }
  214. }
  215. if cc.dopts.resolverBuilder == nil {
  216. // Only try to parse target when resolver builder is not already set.
  217. cc.parsedTarget = parseTarget(cc.target)
  218. grpclog.Infof("parsed scheme: %q", cc.parsedTarget.Scheme)
  219. cc.dopts.resolverBuilder = resolver.Get(cc.parsedTarget.Scheme)
  220. if cc.dopts.resolverBuilder == nil {
  221. // If resolver builder is still nil, the parsed target's scheme is
  222. // not registered. Fallback to default resolver and set Endpoint to
  223. // the original target.
  224. grpclog.Infof("scheme %q not registered, fallback to default scheme", cc.parsedTarget.Scheme)
  225. cc.parsedTarget = resolver.Target{
  226. Scheme: resolver.GetDefaultScheme(),
  227. Endpoint: target,
  228. }
  229. cc.dopts.resolverBuilder = resolver.Get(cc.parsedTarget.Scheme)
  230. }
  231. } else {
  232. cc.parsedTarget = resolver.Target{Endpoint: target}
  233. }
  234. creds := cc.dopts.copts.TransportCredentials
  235. if creds != nil && creds.Info().ServerName != "" {
  236. cc.authority = creds.Info().ServerName
  237. } else if cc.dopts.insecure && cc.dopts.authority != "" {
  238. cc.authority = cc.dopts.authority
  239. } else {
  240. // Use endpoint from "scheme://authority/endpoint" as the default
  241. // authority for ClientConn.
  242. cc.authority = cc.parsedTarget.Endpoint
  243. }
  244. if cc.dopts.scChan != nil && !scSet {
  245. // Blocking wait for the initial service config.
  246. select {
  247. case sc, ok := <-cc.dopts.scChan:
  248. if ok {
  249. cc.sc = sc
  250. }
  251. case <-ctx.Done():
  252. return nil, ctx.Err()
  253. }
  254. }
  255. if cc.dopts.scChan != nil {
  256. go cc.scWatcher()
  257. }
  258. var credsClone credentials.TransportCredentials
  259. if creds := cc.dopts.copts.TransportCredentials; creds != nil {
  260. credsClone = creds.Clone()
  261. }
  262. cc.balancerBuildOpts = balancer.BuildOptions{
  263. DialCreds: credsClone,
  264. CredsBundle: cc.dopts.copts.CredsBundle,
  265. Dialer: cc.dopts.copts.Dialer,
  266. ChannelzParentID: cc.channelzID,
  267. }
  268. // Build the resolver.
  269. rWrapper, err := newCCResolverWrapper(cc)
  270. if err != nil {
  271. return nil, fmt.Errorf("failed to build resolver: %v", err)
  272. }
  273. cc.mu.Lock()
  274. cc.resolverWrapper = rWrapper
  275. cc.mu.Unlock()
  276. // A blocking dial blocks until the clientConn is ready.
  277. if cc.dopts.block {
  278. for {
  279. s := cc.GetState()
  280. if s == connectivity.Ready {
  281. break
  282. } else if cc.dopts.copts.FailOnNonTempDialError && s == connectivity.TransientFailure {
  283. if err = cc.blockingpicker.connectionError(); err != nil {
  284. terr, ok := err.(interface {
  285. Temporary() bool
  286. })
  287. if ok && !terr.Temporary() {
  288. return nil, err
  289. }
  290. }
  291. }
  292. if !cc.WaitForStateChange(ctx, s) {
  293. // ctx got timeout or canceled.
  294. return nil, ctx.Err()
  295. }
  296. }
  297. }
  298. return cc, nil
  299. }
  300. // connectivityStateManager keeps the connectivity.State of ClientConn.
  301. // This struct will eventually be exported so the balancers can access it.
  302. type connectivityStateManager struct {
  303. mu sync.Mutex
  304. state connectivity.State
  305. notifyChan chan struct{}
  306. channelzID int64
  307. }
  308. // updateState updates the connectivity.State of ClientConn.
  309. // If there's a change it notifies goroutines waiting on state change to
  310. // happen.
  311. func (csm *connectivityStateManager) updateState(state connectivity.State) {
  312. csm.mu.Lock()
  313. defer csm.mu.Unlock()
  314. if csm.state == connectivity.Shutdown {
  315. return
  316. }
  317. if csm.state == state {
  318. return
  319. }
  320. csm.state = state
  321. if channelz.IsOn() {
  322. channelz.AddTraceEvent(csm.channelzID, &channelz.TraceEventDesc{
  323. Desc: fmt.Sprintf("Channel Connectivity change to %v", state),
  324. Severity: channelz.CtINFO,
  325. })
  326. }
  327. if csm.notifyChan != nil {
  328. // There are other goroutines waiting on this channel.
  329. close(csm.notifyChan)
  330. csm.notifyChan = nil
  331. }
  332. }
  333. func (csm *connectivityStateManager) getState() connectivity.State {
  334. csm.mu.Lock()
  335. defer csm.mu.Unlock()
  336. return csm.state
  337. }
  338. func (csm *connectivityStateManager) getNotifyChan() <-chan struct{} {
  339. csm.mu.Lock()
  340. defer csm.mu.Unlock()
  341. if csm.notifyChan == nil {
  342. csm.notifyChan = make(chan struct{})
  343. }
  344. return csm.notifyChan
  345. }
  346. // ClientConn represents a client connection to an RPC server.
  347. type ClientConn struct {
  348. ctx context.Context
  349. cancel context.CancelFunc
  350. target string
  351. parsedTarget resolver.Target
  352. authority string
  353. dopts dialOptions
  354. csMgr *connectivityStateManager
  355. balancerBuildOpts balancer.BuildOptions
  356. blockingpicker *pickerWrapper
  357. mu sync.RWMutex
  358. resolverWrapper *ccResolverWrapper
  359. sc ServiceConfig
  360. scRaw string
  361. conns map[*addrConn]struct{}
  362. // Keepalive parameter can be updated if a GoAway is received.
  363. mkp keepalive.ClientParameters
  364. curBalancerName string
  365. preBalancerName string // previous balancer name.
  366. curAddresses []resolver.Address
  367. balancerWrapper *ccBalancerWrapper
  368. retryThrottler atomic.Value
  369. firstResolveEvent *grpcsync.Event
  370. channelzID int64 // channelz unique identification number
  371. czData *channelzData
  372. }
  373. // WaitForStateChange waits until the connectivity.State of ClientConn changes from sourceState or
  374. // ctx expires. A true value is returned in former case and false in latter.
  375. // This is an EXPERIMENTAL API.
  376. func (cc *ClientConn) WaitForStateChange(ctx context.Context, sourceState connectivity.State) bool {
  377. ch := cc.csMgr.getNotifyChan()
  378. if cc.csMgr.getState() != sourceState {
  379. return true
  380. }
  381. select {
  382. case <-ctx.Done():
  383. return false
  384. case <-ch:
  385. return true
  386. }
  387. }
  388. // GetState returns the connectivity.State of ClientConn.
  389. // This is an EXPERIMENTAL API.
  390. func (cc *ClientConn) GetState() connectivity.State {
  391. return cc.csMgr.getState()
  392. }
  393. func (cc *ClientConn) scWatcher() {
  394. for {
  395. select {
  396. case sc, ok := <-cc.dopts.scChan:
  397. if !ok {
  398. return
  399. }
  400. cc.mu.Lock()
  401. // TODO: load balance policy runtime change is ignored.
  402. // We may revisit this decision in the future.
  403. cc.sc = sc
  404. cc.scRaw = ""
  405. cc.mu.Unlock()
  406. case <-cc.ctx.Done():
  407. return
  408. }
  409. }
  410. }
  411. // waitForResolvedAddrs blocks until the resolver has provided addresses or the
  412. // context expires. Returns nil unless the context expires first; otherwise
  413. // returns a status error based on the context.
  414. func (cc *ClientConn) waitForResolvedAddrs(ctx context.Context) error {
  415. // This is on the RPC path, so we use a fast path to avoid the
  416. // more-expensive "select" below after the resolver has returned once.
  417. if cc.firstResolveEvent.HasFired() {
  418. return nil
  419. }
  420. select {
  421. case <-cc.firstResolveEvent.Done():
  422. return nil
  423. case <-ctx.Done():
  424. return status.FromContextError(ctx.Err()).Err()
  425. case <-cc.ctx.Done():
  426. return ErrClientConnClosing
  427. }
  428. }
  429. func (cc *ClientConn) handleResolvedAddrs(addrs []resolver.Address, err error) {
  430. cc.mu.Lock()
  431. defer cc.mu.Unlock()
  432. if cc.conns == nil {
  433. // cc was closed.
  434. return
  435. }
  436. if reflect.DeepEqual(cc.curAddresses, addrs) {
  437. return
  438. }
  439. cc.curAddresses = addrs
  440. cc.firstResolveEvent.Fire()
  441. if cc.dopts.balancerBuilder == nil {
  442. // Only look at balancer types and switch balancer if balancer dial
  443. // option is not set.
  444. var isGRPCLB bool
  445. for _, a := range addrs {
  446. if a.Type == resolver.GRPCLB {
  447. isGRPCLB = true
  448. break
  449. }
  450. }
  451. var newBalancerName string
  452. if isGRPCLB {
  453. newBalancerName = grpclbName
  454. } else {
  455. // Address list doesn't contain grpclb address. Try to pick a
  456. // non-grpclb balancer.
  457. newBalancerName = cc.curBalancerName
  458. // If current balancer is grpclb, switch to the previous one.
  459. if newBalancerName == grpclbName {
  460. newBalancerName = cc.preBalancerName
  461. }
  462. // The following could be true in two cases:
  463. // - the first time handling resolved addresses
  464. // (curBalancerName="")
  465. // - the first time handling non-grpclb addresses
  466. // (curBalancerName="grpclb", preBalancerName="")
  467. if newBalancerName == "" {
  468. newBalancerName = PickFirstBalancerName
  469. }
  470. }
  471. cc.switchBalancer(newBalancerName)
  472. } else if cc.balancerWrapper == nil {
  473. // Balancer dial option was set, and this is the first time handling
  474. // resolved addresses. Build a balancer with dopts.balancerBuilder.
  475. cc.balancerWrapper = newCCBalancerWrapper(cc, cc.dopts.balancerBuilder, cc.balancerBuildOpts)
  476. }
  477. cc.balancerWrapper.handleResolvedAddrs(addrs, nil)
  478. }
  479. // switchBalancer starts the switching from current balancer to the balancer
  480. // with the given name.
  481. //
  482. // It will NOT send the current address list to the new balancer. If needed,
  483. // caller of this function should send address list to the new balancer after
  484. // this function returns.
  485. //
  486. // Caller must hold cc.mu.
  487. func (cc *ClientConn) switchBalancer(name string) {
  488. if cc.conns == nil {
  489. return
  490. }
  491. if strings.ToLower(cc.curBalancerName) == strings.ToLower(name) {
  492. return
  493. }
  494. grpclog.Infof("ClientConn switching balancer to %q", name)
  495. if cc.dopts.balancerBuilder != nil {
  496. grpclog.Infoln("ignoring balancer switching: Balancer DialOption used instead")
  497. return
  498. }
  499. // TODO(bar switching) change this to two steps: drain and close.
  500. // Keep track of sc in wrapper.
  501. if cc.balancerWrapper != nil {
  502. cc.balancerWrapper.close()
  503. }
  504. builder := balancer.Get(name)
  505. // TODO(yuxuanli): If user send a service config that does not contain a valid balancer name, should
  506. // we reuse previous one?
  507. if channelz.IsOn() {
  508. if builder == nil {
  509. channelz.AddTraceEvent(cc.channelzID, &channelz.TraceEventDesc{
  510. Desc: fmt.Sprintf("Channel switches to new LB policy %q due to fallback from invalid balancer name", PickFirstBalancerName),
  511. Severity: channelz.CtWarning,
  512. })
  513. } else {
  514. channelz.AddTraceEvent(cc.channelzID, &channelz.TraceEventDesc{
  515. Desc: fmt.Sprintf("Channel switches to new LB policy %q", name),
  516. Severity: channelz.CtINFO,
  517. })
  518. }
  519. }
  520. if builder == nil {
  521. grpclog.Infof("failed to get balancer builder for: %v, using pick_first instead", name)
  522. builder = newPickfirstBuilder()
  523. }
  524. cc.preBalancerName = cc.curBalancerName
  525. cc.curBalancerName = builder.Name()
  526. cc.balancerWrapper = newCCBalancerWrapper(cc, builder, cc.balancerBuildOpts)
  527. }
  528. func (cc *ClientConn) handleSubConnStateChange(sc balancer.SubConn, s connectivity.State) {
  529. cc.mu.Lock()
  530. if cc.conns == nil {
  531. cc.mu.Unlock()
  532. return
  533. }
  534. // TODO(bar switching) send updates to all balancer wrappers when balancer
  535. // gracefully switching is supported.
  536. cc.balancerWrapper.handleSubConnStateChange(sc, s)
  537. cc.mu.Unlock()
  538. }
  539. // newAddrConn creates an addrConn for addrs and adds it to cc.conns.
  540. //
  541. // Caller needs to make sure len(addrs) > 0.
  542. func (cc *ClientConn) newAddrConn(addrs []resolver.Address, opts balancer.NewSubConnOptions) (*addrConn, error) {
  543. ac := &addrConn{
  544. cc: cc,
  545. addrs: addrs,
  546. scopts: opts,
  547. dopts: cc.dopts,
  548. czData: new(channelzData),
  549. resetBackoff: make(chan struct{}),
  550. }
  551. ac.ctx, ac.cancel = context.WithCancel(cc.ctx)
  552. // Track ac in cc. This needs to be done before any getTransport(...) is called.
  553. cc.mu.Lock()
  554. if cc.conns == nil {
  555. cc.mu.Unlock()
  556. return nil, ErrClientConnClosing
  557. }
  558. if channelz.IsOn() {
  559. ac.channelzID = channelz.RegisterSubChannel(ac, cc.channelzID, "")
  560. channelz.AddTraceEvent(ac.channelzID, &channelz.TraceEventDesc{
  561. Desc: "Subchannel Created",
  562. Severity: channelz.CtINFO,
  563. Parent: &channelz.TraceEventDesc{
  564. Desc: fmt.Sprintf("Subchannel(id:%d) created", ac.channelzID),
  565. Severity: channelz.CtINFO,
  566. },
  567. })
  568. }
  569. cc.conns[ac] = struct{}{}
  570. cc.mu.Unlock()
  571. return ac, nil
  572. }
  573. // removeAddrConn removes the addrConn in the subConn from clientConn.
  574. // It also tears down the ac with the given error.
  575. func (cc *ClientConn) removeAddrConn(ac *addrConn, err error) {
  576. cc.mu.Lock()
  577. if cc.conns == nil {
  578. cc.mu.Unlock()
  579. return
  580. }
  581. delete(cc.conns, ac)
  582. cc.mu.Unlock()
  583. ac.tearDown(err)
  584. }
  585. func (cc *ClientConn) channelzMetric() *channelz.ChannelInternalMetric {
  586. return &channelz.ChannelInternalMetric{
  587. State: cc.GetState(),
  588. Target: cc.target,
  589. CallsStarted: atomic.LoadInt64(&cc.czData.callsStarted),
  590. CallsSucceeded: atomic.LoadInt64(&cc.czData.callsSucceeded),
  591. CallsFailed: atomic.LoadInt64(&cc.czData.callsFailed),
  592. LastCallStartedTimestamp: time.Unix(0, atomic.LoadInt64(&cc.czData.lastCallStartedTime)),
  593. }
  594. }
  595. // Target returns the target string of the ClientConn.
  596. // This is an EXPERIMENTAL API.
  597. func (cc *ClientConn) Target() string {
  598. return cc.target
  599. }
  600. func (cc *ClientConn) incrCallsStarted() {
  601. atomic.AddInt64(&cc.czData.callsStarted, 1)
  602. atomic.StoreInt64(&cc.czData.lastCallStartedTime, time.Now().UnixNano())
  603. }
  604. func (cc *ClientConn) incrCallsSucceeded() {
  605. atomic.AddInt64(&cc.czData.callsSucceeded, 1)
  606. }
  607. func (cc *ClientConn) incrCallsFailed() {
  608. atomic.AddInt64(&cc.czData.callsFailed, 1)
  609. }
  610. // connect starts creating a transport.
  611. // It does nothing if the ac is not IDLE.
  612. // TODO(bar) Move this to the addrConn section.
  613. func (ac *addrConn) connect() error {
  614. ac.mu.Lock()
  615. if ac.state == connectivity.Shutdown {
  616. ac.mu.Unlock()
  617. return errConnClosing
  618. }
  619. if ac.state != connectivity.Idle {
  620. ac.mu.Unlock()
  621. return nil
  622. }
  623. ac.updateConnectivityState(connectivity.Connecting)
  624. ac.mu.Unlock()
  625. // Start a goroutine connecting to the server asynchronously.
  626. go ac.resetTransport()
  627. return nil
  628. }
  629. // tryUpdateAddrs tries to update ac.addrs with the new addresses list.
  630. //
  631. // It checks whether current connected address of ac is in the new addrs list.
  632. // - If true, it updates ac.addrs and returns true. The ac will keep using
  633. // the existing connection.
  634. // - If false, it does nothing and returns false.
  635. func (ac *addrConn) tryUpdateAddrs(addrs []resolver.Address) bool {
  636. ac.mu.Lock()
  637. defer ac.mu.Unlock()
  638. grpclog.Infof("addrConn: tryUpdateAddrs curAddr: %v, addrs: %v", ac.curAddr, addrs)
  639. if ac.state == connectivity.Shutdown {
  640. ac.addrs = addrs
  641. return true
  642. }
  643. // Unless we're busy reconnecting already, let's reconnect from the top of
  644. // the list.
  645. if ac.state != connectivity.Ready {
  646. return false
  647. }
  648. var curAddrFound bool
  649. for _, a := range addrs {
  650. if reflect.DeepEqual(ac.curAddr, a) {
  651. curAddrFound = true
  652. break
  653. }
  654. }
  655. grpclog.Infof("addrConn: tryUpdateAddrs curAddrFound: %v", curAddrFound)
  656. if curAddrFound {
  657. ac.addrs = addrs
  658. }
  659. return curAddrFound
  660. }
  661. // GetMethodConfig gets the method config of the input method.
  662. // If there's an exact match for input method (i.e. /service/method), we return
  663. // the corresponding MethodConfig.
  664. // If there isn't an exact match for the input method, we look for the default config
  665. // under the service (i.e /service/). If there is a default MethodConfig for
  666. // the service, we return it.
  667. // Otherwise, we return an empty MethodConfig.
  668. func (cc *ClientConn) GetMethodConfig(method string) MethodConfig {
  669. // TODO: Avoid the locking here.
  670. cc.mu.RLock()
  671. defer cc.mu.RUnlock()
  672. m, ok := cc.sc.Methods[method]
  673. if !ok {
  674. i := strings.LastIndex(method, "/")
  675. m = cc.sc.Methods[method[:i+1]]
  676. }
  677. return m
  678. }
  679. func (cc *ClientConn) healthCheckConfig() *healthCheckConfig {
  680. cc.mu.RLock()
  681. defer cc.mu.RUnlock()
  682. return cc.sc.healthCheckConfig
  683. }
  684. func (cc *ClientConn) getTransport(ctx context.Context, failfast bool, method string) (transport.ClientTransport, func(balancer.DoneInfo), error) {
  685. hdr, _ := metadata.FromOutgoingContext(ctx)
  686. t, done, err := cc.blockingpicker.pick(ctx, failfast, balancer.PickOptions{
  687. FullMethodName: method,
  688. Header: hdr,
  689. })
  690. if err != nil {
  691. return nil, nil, toRPCErr(err)
  692. }
  693. return t, done, nil
  694. }
  695. // handleServiceConfig parses the service config string in JSON format to Go native
  696. // struct ServiceConfig, and store both the struct and the JSON string in ClientConn.
  697. func (cc *ClientConn) handleServiceConfig(js string) error {
  698. if cc.dopts.disableServiceConfig {
  699. return nil
  700. }
  701. if cc.scRaw == js {
  702. return nil
  703. }
  704. if channelz.IsOn() {
  705. channelz.AddTraceEvent(cc.channelzID, &channelz.TraceEventDesc{
  706. // The special formatting of \"%s\" instead of %q is to provide nice printing of service config
  707. // for human consumption.
  708. Desc: fmt.Sprintf("Channel has a new service config \"%s\"", js),
  709. Severity: channelz.CtINFO,
  710. })
  711. }
  712. sc, err := parseServiceConfig(js)
  713. if err != nil {
  714. return err
  715. }
  716. cc.mu.Lock()
  717. // Check if the ClientConn is already closed. Some fields (e.g.
  718. // balancerWrapper) are set to nil when closing the ClientConn, and could
  719. // cause nil pointer panic if we don't have this check.
  720. if cc.conns == nil {
  721. cc.mu.Unlock()
  722. return nil
  723. }
  724. cc.scRaw = js
  725. cc.sc = sc
  726. if sc.retryThrottling != nil {
  727. newThrottler := &retryThrottler{
  728. tokens: sc.retryThrottling.MaxTokens,
  729. max: sc.retryThrottling.MaxTokens,
  730. thresh: sc.retryThrottling.MaxTokens / 2,
  731. ratio: sc.retryThrottling.TokenRatio,
  732. }
  733. cc.retryThrottler.Store(newThrottler)
  734. } else {
  735. cc.retryThrottler.Store((*retryThrottler)(nil))
  736. }
  737. if sc.LB != nil && *sc.LB != grpclbName { // "grpclb" is not a valid balancer option in service config.
  738. if cc.curBalancerName == grpclbName {
  739. // If current balancer is grpclb, there's at least one grpclb
  740. // balancer address in the resolved list. Don't switch the balancer,
  741. // but change the previous balancer name, so if a new resolved
  742. // address list doesn't contain grpclb address, balancer will be
  743. // switched to *sc.LB.
  744. cc.preBalancerName = *sc.LB
  745. } else {
  746. cc.switchBalancer(*sc.LB)
  747. cc.balancerWrapper.handleResolvedAddrs(cc.curAddresses, nil)
  748. }
  749. }
  750. cc.mu.Unlock()
  751. return nil
  752. }
  753. func (cc *ClientConn) resolveNow(o resolver.ResolveNowOption) {
  754. cc.mu.RLock()
  755. r := cc.resolverWrapper
  756. cc.mu.RUnlock()
  757. if r == nil {
  758. return
  759. }
  760. go r.resolveNow(o)
  761. }
  762. // ResetConnectBackoff wakes up all subchannels in transient failure and causes
  763. // them to attempt another connection immediately. It also resets the backoff
  764. // times used for subsequent attempts regardless of the current state.
  765. //
  766. // In general, this function should not be used. Typical service or network
  767. // outages result in a reasonable client reconnection strategy by default.
  768. // However, if a previously unavailable network becomes available, this may be
  769. // used to trigger an immediate reconnect.
  770. //
  771. // This API is EXPERIMENTAL.
  772. func (cc *ClientConn) ResetConnectBackoff() {
  773. cc.mu.Lock()
  774. defer cc.mu.Unlock()
  775. for ac := range cc.conns {
  776. ac.resetConnectBackoff()
  777. }
  778. }
  779. // Close tears down the ClientConn and all underlying connections.
  780. func (cc *ClientConn) Close() error {
  781. defer cc.cancel()
  782. cc.mu.Lock()
  783. if cc.conns == nil {
  784. cc.mu.Unlock()
  785. return ErrClientConnClosing
  786. }
  787. conns := cc.conns
  788. cc.conns = nil
  789. cc.csMgr.updateState(connectivity.Shutdown)
  790. rWrapper := cc.resolverWrapper
  791. cc.resolverWrapper = nil
  792. bWrapper := cc.balancerWrapper
  793. cc.balancerWrapper = nil
  794. cc.mu.Unlock()
  795. cc.blockingpicker.close()
  796. if rWrapper != nil {
  797. rWrapper.close()
  798. }
  799. if bWrapper != nil {
  800. bWrapper.close()
  801. }
  802. for ac := range conns {
  803. ac.tearDown(ErrClientConnClosing)
  804. }
  805. if channelz.IsOn() {
  806. ted := &channelz.TraceEventDesc{
  807. Desc: "Channel Deleted",
  808. Severity: channelz.CtINFO,
  809. }
  810. if cc.dopts.channelzParentID != 0 {
  811. ted.Parent = &channelz.TraceEventDesc{
  812. Desc: fmt.Sprintf("Nested channel(id:%d) deleted", cc.channelzID),
  813. Severity: channelz.CtINFO,
  814. }
  815. }
  816. channelz.AddTraceEvent(cc.channelzID, ted)
  817. // TraceEvent needs to be called before RemoveEntry, as TraceEvent may add trace reference to
  818. // the entity beng deleted, and thus prevent it from being deleted right away.
  819. channelz.RemoveEntry(cc.channelzID)
  820. }
  821. return nil
  822. }
  823. // addrConn is a network connection to a given address.
  824. type addrConn struct {
  825. ctx context.Context
  826. cancel context.CancelFunc
  827. cc *ClientConn
  828. dopts dialOptions
  829. acbw balancer.SubConn
  830. scopts balancer.NewSubConnOptions
  831. // transport is set when there's a viable transport (note: ac state may not be READY as LB channel
  832. // health checking may require server to report healthy to set ac to READY), and is reset
  833. // to nil when the current transport should no longer be used to create a stream (e.g. after GoAway
  834. // is received, transport is closed, ac has been torn down).
  835. transport transport.ClientTransport // The current transport.
  836. mu sync.Mutex
  837. curAddr resolver.Address // The current address.
  838. addrs []resolver.Address // All addresses that the resolver resolved to.
  839. // Use updateConnectivityState for updating addrConn's connectivity state.
  840. state connectivity.State
  841. tearDownErr error // The reason this addrConn is torn down.
  842. backoffIdx int // Needs to be stateful for resetConnectBackoff.
  843. resetBackoff chan struct{}
  844. channelzID int64 // channelz unique identification number.
  845. czData *channelzData
  846. }
  847. // Note: this requires a lock on ac.mu.
  848. func (ac *addrConn) updateConnectivityState(s connectivity.State) {
  849. if ac.state == s {
  850. return
  851. }
  852. updateMsg := fmt.Sprintf("Subchannel Connectivity change to %v", s)
  853. ac.state = s
  854. if channelz.IsOn() {
  855. channelz.AddTraceEvent(ac.channelzID, &channelz.TraceEventDesc{
  856. Desc: updateMsg,
  857. Severity: channelz.CtINFO,
  858. })
  859. }
  860. ac.cc.handleSubConnStateChange(ac.acbw, s)
  861. }
  862. // adjustParams updates parameters used to create transports upon
  863. // receiving a GoAway.
  864. func (ac *addrConn) adjustParams(r transport.GoAwayReason) {
  865. switch r {
  866. case transport.GoAwayTooManyPings:
  867. v := 2 * ac.dopts.copts.KeepaliveParams.Time
  868. ac.cc.mu.Lock()
  869. if v > ac.cc.mkp.Time {
  870. ac.cc.mkp.Time = v
  871. }
  872. ac.cc.mu.Unlock()
  873. }
  874. }
  875. func (ac *addrConn) resetTransport() {
  876. for i := 0; ; i++ {
  877. tryNextAddrFromStart := grpcsync.NewEvent()
  878. ac.mu.Lock()
  879. if i > 0 {
  880. ac.cc.resolveNow(resolver.ResolveNowOption{})
  881. }
  882. addrs := ac.addrs
  883. backoffFor := ac.dopts.bs.Backoff(ac.backoffIdx)
  884. // This will be the duration that dial gets to finish.
  885. dialDuration := getMinConnectTimeout()
  886. if dialDuration < backoffFor {
  887. // Give dial more time as we keep failing to connect.
  888. dialDuration = backoffFor
  889. }
  890. connectDeadline := time.Now().Add(dialDuration)
  891. ac.mu.Unlock()
  892. addrLoop:
  893. for _, addr := range addrs {
  894. ac.mu.Lock()
  895. if ac.state == connectivity.Shutdown {
  896. ac.mu.Unlock()
  897. return
  898. }
  899. ac.updateConnectivityState(connectivity.Connecting)
  900. ac.transport = nil
  901. ac.cc.mu.RLock()
  902. ac.dopts.copts.KeepaliveParams = ac.cc.mkp
  903. ac.cc.mu.RUnlock()
  904. if ac.state == connectivity.Shutdown {
  905. ac.mu.Unlock()
  906. return
  907. }
  908. copts := ac.dopts.copts
  909. if ac.scopts.CredsBundle != nil {
  910. copts.CredsBundle = ac.scopts.CredsBundle
  911. }
  912. hctx, hcancel := context.WithCancel(ac.ctx)
  913. defer hcancel()
  914. ac.mu.Unlock()
  915. if channelz.IsOn() {
  916. channelz.AddTraceEvent(ac.channelzID, &channelz.TraceEventDesc{
  917. Desc: fmt.Sprintf("Subchannel picks a new address %q to connect", addr.Addr),
  918. Severity: channelz.CtINFO,
  919. })
  920. }
  921. reconnect := grpcsync.NewEvent()
  922. prefaceReceived := make(chan struct{})
  923. newTr, err := ac.createTransport(addr, copts, connectDeadline, reconnect, prefaceReceived)
  924. if err == nil {
  925. ac.mu.Lock()
  926. ac.curAddr = addr
  927. ac.transport = newTr
  928. ac.mu.Unlock()
  929. healthCheckConfig := ac.cc.healthCheckConfig()
  930. // LB channel health checking is only enabled when all the four requirements below are met:
  931. // 1. it is not disabled by the user with the WithDisableHealthCheck DialOption,
  932. // 2. the internal.HealthCheckFunc is set by importing the grpc/healthcheck package,
  933. // 3. a service config with non-empty healthCheckConfig field is provided,
  934. // 4. the current load balancer allows it.
  935. healthcheckManagingState := false
  936. if !ac.cc.dopts.disableHealthCheck && healthCheckConfig != nil && ac.scopts.HealthCheckEnabled {
  937. if ac.cc.dopts.healthCheckFunc == nil {
  938. // TODO: add a link to the health check doc in the error message.
  939. grpclog.Error("the client side LB channel health check function has not been set.")
  940. } else {
  941. // TODO(deklerk) refactor to just return transport
  942. go ac.startHealthCheck(hctx, newTr, addr, healthCheckConfig.ServiceName)
  943. healthcheckManagingState = true
  944. }
  945. }
  946. if !healthcheckManagingState {
  947. ac.mu.Lock()
  948. ac.updateConnectivityState(connectivity.Ready)
  949. ac.mu.Unlock()
  950. }
  951. } else {
  952. hcancel()
  953. if err == errConnClosing {
  954. return
  955. }
  956. if tryNextAddrFromStart.HasFired() {
  957. break addrLoop
  958. }
  959. continue
  960. }
  961. ac.mu.Lock()
  962. reqHandshake := ac.dopts.reqHandshake
  963. ac.mu.Unlock()
  964. <-reconnect.Done()
  965. hcancel()
  966. if reqHandshake == envconfig.RequireHandshakeHybrid {
  967. // In RequireHandshakeHybrid mode, we must check to see whether
  968. // server preface has arrived yet to decide whether to start
  969. // reconnecting at the top of the list (server preface received)
  970. // or continue with the next addr in the list as if the
  971. // connection were not successful (server preface not received).
  972. select {
  973. case <-prefaceReceived:
  974. // We received a server preface - huzzah! We consider this
  975. // a success and restart from the top of the addr list.
  976. ac.mu.Lock()
  977. ac.backoffIdx = 0
  978. ac.mu.Unlock()
  979. break addrLoop
  980. default:
  981. // Despite having set state to READY, in hybrid mode we
  982. // consider this a failure and continue connecting at the
  983. // next addr in the list.
  984. ac.mu.Lock()
  985. if ac.state == connectivity.Shutdown {
  986. ac.mu.Unlock()
  987. return
  988. }
  989. ac.updateConnectivityState(connectivity.TransientFailure)
  990. ac.mu.Unlock()
  991. if tryNextAddrFromStart.HasFired() {
  992. break addrLoop
  993. }
  994. }
  995. } else {
  996. // In RequireHandshakeOn mode, we would have already waited for
  997. // the server preface, so we consider this a success and restart
  998. // from the top of the addr list. In RequireHandshakeOff mode,
  999. // we don't care to wait for the server preface before
  1000. // considering this a success, so we also restart from the top
  1001. // of the addr list.
  1002. ac.mu.Lock()
  1003. ac.backoffIdx = 0
  1004. ac.mu.Unlock()
  1005. break addrLoop
  1006. }
  1007. }
  1008. // After exhausting all addresses, or after need to reconnect after a
  1009. // READY, the addrConn enters TRANSIENT_FAILURE.
  1010. ac.mu.Lock()
  1011. if ac.state == connectivity.Shutdown {
  1012. ac.mu.Unlock()
  1013. return
  1014. }
  1015. ac.updateConnectivityState(connectivity.TransientFailure)
  1016. // Backoff.
  1017. b := ac.resetBackoff
  1018. timer := time.NewTimer(backoffFor)
  1019. acctx := ac.ctx
  1020. ac.mu.Unlock()
  1021. select {
  1022. case <-timer.C:
  1023. ac.mu.Lock()
  1024. ac.backoffIdx++
  1025. ac.mu.Unlock()
  1026. case <-b:
  1027. timer.Stop()
  1028. case <-acctx.Done():
  1029. timer.Stop()
  1030. return
  1031. }
  1032. }
  1033. }
  1034. // createTransport creates a connection to one of the backends in addrs. It
  1035. // sets ac.transport in the success case, or it returns an error if it was
  1036. // unable to successfully create a transport.
  1037. //
  1038. // If waitForHandshake is enabled, it blocks until server preface arrives.
  1039. func (ac *addrConn) createTransport(addr resolver.Address, copts transport.ConnectOptions, connectDeadline time.Time, reconnect *grpcsync.Event, prefaceReceived chan struct{}) (transport.ClientTransport, error) {
  1040. onCloseCalled := make(chan struct{})
  1041. target := transport.TargetInfo{
  1042. Addr: addr.Addr,
  1043. Metadata: addr.Metadata,
  1044. Authority: ac.cc.authority,
  1045. }
  1046. prefaceTimer := time.NewTimer(time.Until(connectDeadline))
  1047. onGoAway := func(r transport.GoAwayReason) {
  1048. ac.mu.Lock()
  1049. ac.adjustParams(r)
  1050. ac.mu.Unlock()
  1051. reconnect.Fire()
  1052. }
  1053. onClose := func() {
  1054. close(onCloseCalled)
  1055. prefaceTimer.Stop()
  1056. reconnect.Fire()
  1057. }
  1058. onPrefaceReceipt := func() {
  1059. close(prefaceReceived)
  1060. prefaceTimer.Stop()
  1061. }
  1062. connectCtx, cancel := context.WithDeadline(ac.ctx, connectDeadline)
  1063. defer cancel()
  1064. if channelz.IsOn() {
  1065. copts.ChannelzParentID = ac.channelzID
  1066. }
  1067. newTr, err := transport.NewClientTransport(connectCtx, ac.cc.ctx, target, copts, onPrefaceReceipt, onGoAway, onClose)
  1068. if err == nil {
  1069. if ac.dopts.reqHandshake == envconfig.RequireHandshakeOn {
  1070. select {
  1071. case <-prefaceTimer.C:
  1072. // We didn't get the preface in time.
  1073. newTr.Close()
  1074. err = errors.New("timed out waiting for server handshake")
  1075. case <-prefaceReceived:
  1076. // We got the preface - huzzah! things are good.
  1077. case <-onCloseCalled:
  1078. // The transport has already closed - noop.
  1079. return nil, errors.New("connection closed")
  1080. }
  1081. } else if ac.dopts.reqHandshake == envconfig.RequireHandshakeHybrid {
  1082. go func() {
  1083. select {
  1084. case <-prefaceTimer.C:
  1085. // We didn't get the preface in time.
  1086. newTr.Close()
  1087. case <-prefaceReceived:
  1088. // We got the preface just in the nick of time - huzzah!
  1089. case <-onCloseCalled:
  1090. // The transport has already closed - noop.
  1091. }
  1092. }()
  1093. }
  1094. }
  1095. if err != nil {
  1096. // newTr is either nil, or closed.
  1097. ac.cc.blockingpicker.updateConnectionError(err)
  1098. ac.mu.Lock()
  1099. if ac.state == connectivity.Shutdown {
  1100. // ac.tearDown(...) has been invoked.
  1101. ac.mu.Unlock()
  1102. return nil, errConnClosing
  1103. }
  1104. ac.mu.Unlock()
  1105. grpclog.Warningf("grpc: addrConn.createTransport failed to connect to %v. Err :%v. Reconnecting...", addr, err)
  1106. return nil, err
  1107. }
  1108. // Now there is a viable transport to be use, so set ac.transport to reflect the new viable transport.
  1109. ac.mu.Lock()
  1110. if ac.state == connectivity.Shutdown {
  1111. ac.mu.Unlock()
  1112. newTr.Close()
  1113. return nil, errConnClosing
  1114. }
  1115. ac.mu.Unlock()
  1116. // Now there is a viable transport to be use, so set ac.transport to reflect the new viable transport.
  1117. ac.mu.Lock()
  1118. if ac.state == connectivity.Shutdown {
  1119. ac.mu.Unlock()
  1120. newTr.Close()
  1121. return nil, errConnClosing
  1122. }
  1123. ac.mu.Unlock()
  1124. return newTr, nil
  1125. }
  1126. func (ac *addrConn) startHealthCheck(ctx context.Context, newTr transport.ClientTransport, addr resolver.Address, serviceName string) {
  1127. // Set up the health check helper functions
  1128. newStream := func() (interface{}, error) {
  1129. return ac.newClientStream(ctx, &StreamDesc{ServerStreams: true}, "/grpc.health.v1.Health/Watch", newTr)
  1130. }
  1131. firstReady := true
  1132. reportHealth := func(ok bool) {
  1133. ac.mu.Lock()
  1134. defer ac.mu.Unlock()
  1135. if ac.transport != newTr {
  1136. return
  1137. }
  1138. if ok {
  1139. if firstReady {
  1140. firstReady = false
  1141. ac.curAddr = addr
  1142. }
  1143. ac.updateConnectivityState(connectivity.Ready)
  1144. } else {
  1145. ac.updateConnectivityState(connectivity.TransientFailure)
  1146. }
  1147. }
  1148. err := ac.cc.dopts.healthCheckFunc(ctx, newStream, reportHealth, serviceName)
  1149. if err != nil {
  1150. if status.Code(err) == codes.Unimplemented {
  1151. if channelz.IsOn() {
  1152. channelz.AddTraceEvent(ac.channelzID, &channelz.TraceEventDesc{
  1153. Desc: "Subchannel health check is unimplemented at server side, thus health check is disabled",
  1154. Severity: channelz.CtError,
  1155. })
  1156. }
  1157. grpclog.Error("Subchannel health check is unimplemented at server side, thus health check is disabled")
  1158. } else {
  1159. grpclog.Errorf("HealthCheckFunc exits with unexpected error %v", err)
  1160. }
  1161. }
  1162. }
  1163. func (ac *addrConn) resetConnectBackoff() {
  1164. ac.mu.Lock()
  1165. close(ac.resetBackoff)
  1166. ac.backoffIdx = 0
  1167. ac.resetBackoff = make(chan struct{})
  1168. ac.mu.Unlock()
  1169. }
  1170. // getReadyTransport returns the transport if ac's state is READY.
  1171. // Otherwise it returns nil, false.
  1172. // If ac's state is IDLE, it will trigger ac to connect.
  1173. func (ac *addrConn) getReadyTransport() (transport.ClientTransport, bool) {
  1174. ac.mu.Lock()
  1175. if ac.state == connectivity.Ready && ac.transport != nil {
  1176. t := ac.transport
  1177. ac.mu.Unlock()
  1178. return t, true
  1179. }
  1180. var idle bool
  1181. if ac.state == connectivity.Idle {
  1182. idle = true
  1183. }
  1184. ac.mu.Unlock()
  1185. // Trigger idle ac to connect.
  1186. if idle {
  1187. ac.connect()
  1188. }
  1189. return nil, false
  1190. }
  1191. // tearDown starts to tear down the addrConn.
  1192. // TODO(zhaoq): Make this synchronous to avoid unbounded memory consumption in
  1193. // some edge cases (e.g., the caller opens and closes many addrConn's in a
  1194. // tight loop.
  1195. // tearDown doesn't remove ac from ac.cc.conns.
  1196. func (ac *addrConn) tearDown(err error) {
  1197. ac.mu.Lock()
  1198. if ac.state == connectivity.Shutdown {
  1199. ac.mu.Unlock()
  1200. return
  1201. }
  1202. curTr := ac.transport
  1203. ac.transport = nil
  1204. // We have to set the state to Shutdown before anything else to prevent races
  1205. // between setting the state and logic that waits on context cancelation / etc.
  1206. ac.updateConnectivityState(connectivity.Shutdown)
  1207. ac.cancel()
  1208. ac.tearDownErr = err
  1209. ac.curAddr = resolver.Address{}
  1210. if err == errConnDrain && curTr != nil {
  1211. // GracefulClose(...) may be executed multiple times when
  1212. // i) receiving multiple GoAway frames from the server; or
  1213. // ii) there are concurrent name resolver/Balancer triggered
  1214. // address removal and GoAway.
  1215. // We have to unlock and re-lock here because GracefulClose => Close => onClose, which requires locking ac.mu.
  1216. ac.mu.Unlock()
  1217. curTr.GracefulClose()
  1218. ac.mu.Lock()
  1219. }
  1220. if channelz.IsOn() {
  1221. channelz.AddTraceEvent(ac.channelzID, &channelz.TraceEventDesc{
  1222. Desc: "Subchannel Deleted",
  1223. Severity: channelz.CtINFO,
  1224. Parent: &channelz.TraceEventDesc{
  1225. Desc: fmt.Sprintf("Subchanel(id:%d) deleted", ac.channelzID),
  1226. Severity: channelz.CtINFO,
  1227. },
  1228. })
  1229. // TraceEvent needs to be called before RemoveEntry, as TraceEvent may add trace reference to
  1230. // the entity beng deleted, and thus prevent it from being deleted right away.
  1231. channelz.RemoveEntry(ac.channelzID)
  1232. }
  1233. ac.mu.Unlock()
  1234. }
  1235. func (ac *addrConn) getState() connectivity.State {
  1236. ac.mu.Lock()
  1237. defer ac.mu.Unlock()
  1238. return ac.state
  1239. }
  1240. func (ac *addrConn) ChannelzMetric() *channelz.ChannelInternalMetric {
  1241. ac.mu.Lock()
  1242. addr := ac.curAddr.Addr
  1243. ac.mu.Unlock()
  1244. return &channelz.ChannelInternalMetric{
  1245. State: ac.getState(),
  1246. Target: addr,
  1247. CallsStarted: atomic.LoadInt64(&ac.czData.callsStarted),
  1248. CallsSucceeded: atomic.LoadInt64(&ac.czData.callsSucceeded),
  1249. CallsFailed: atomic.LoadInt64(&ac.czData.callsFailed),
  1250. LastCallStartedTimestamp: time.Unix(0, atomic.LoadInt64(&ac.czData.lastCallStartedTime)),
  1251. }
  1252. }
  1253. func (ac *addrConn) incrCallsStarted() {
  1254. atomic.AddInt64(&ac.czData.callsStarted, 1)
  1255. atomic.StoreInt64(&ac.czData.lastCallStartedTime, time.Now().UnixNano())
  1256. }
  1257. func (ac *addrConn) incrCallsSucceeded() {
  1258. atomic.AddInt64(&ac.czData.callsSucceeded, 1)
  1259. }
  1260. func (ac *addrConn) incrCallsFailed() {
  1261. atomic.AddInt64(&ac.czData.callsFailed, 1)
  1262. }
  1263. type retryThrottler struct {
  1264. max float64
  1265. thresh float64
  1266. ratio float64
  1267. mu sync.Mutex
  1268. tokens float64 // TODO(dfawley): replace with atomic and remove lock.
  1269. }
  1270. // throttle subtracts a retry token from the pool and returns whether a retry
  1271. // should be throttled (disallowed) based upon the retry throttling policy in
  1272. // the service config.
  1273. func (rt *retryThrottler) throttle() bool {
  1274. if rt == nil {
  1275. return false
  1276. }
  1277. rt.mu.Lock()
  1278. defer rt.mu.Unlock()
  1279. rt.tokens--
  1280. if rt.tokens < 0 {
  1281. rt.tokens = 0
  1282. }
  1283. return rt.tokens <= rt.thresh
  1284. }
  1285. func (rt *retryThrottler) successfulRPC() {
  1286. if rt == nil {
  1287. return
  1288. }
  1289. rt.mu.Lock()
  1290. defer rt.mu.Unlock()
  1291. rt.tokens += rt.ratio
  1292. if rt.tokens > rt.max {
  1293. rt.tokens = rt.max
  1294. }
  1295. }
  1296. type channelzChannel struct {
  1297. cc *ClientConn
  1298. }
  1299. func (c *channelzChannel) ChannelzMetric() *channelz.ChannelInternalMetric {
  1300. return c.cc.channelzMetric()
  1301. }
  1302. // ErrClientConnTimeout indicates that the ClientConn cannot establish the
  1303. // underlying connections within the specified timeout.
  1304. //
  1305. // Deprecated: This error is never returned by grpc and should not be
  1306. // referenced by users.
  1307. var ErrClientConnTimeout = errors.New("grpc: timed out when dialing")