module.go 17 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540
  1. // Copyright 2018 The Go Authors. All rights reserved.
  2. // Use of this source code is governed by a BSD-style
  3. // license that can be found in the LICENSE file.
  4. // Package module defines the module.Version type
  5. // along with support code.
  6. package module
  7. // IMPORTANT NOTE
  8. //
  9. // This file essentially defines the set of valid import paths for the go command.
  10. // There are many subtle considerations, including Unicode ambiguity,
  11. // security, network, and file system representations.
  12. //
  13. // This file also defines the set of valid module path and version combinations,
  14. // another topic with many subtle considerations.
  15. //
  16. // Changes to the semantics in this file require approval from rsc.
  17. import (
  18. "fmt"
  19. "sort"
  20. "strings"
  21. "unicode"
  22. "unicode/utf8"
  23. "golang.org/x/tools/internal/semver"
  24. )
  25. // A Version is defined by a module path and version pair.
  26. type Version struct {
  27. Path string
  28. // Version is usually a semantic version in canonical form.
  29. // There are two exceptions to this general rule.
  30. // First, the top-level target of a build has no specific version
  31. // and uses Version = "".
  32. // Second, during MVS calculations the version "none" is used
  33. // to represent the decision to take no version of a given module.
  34. Version string `json:",omitempty"`
  35. }
  36. // Check checks that a given module path, version pair is valid.
  37. // In addition to the path being a valid module path
  38. // and the version being a valid semantic version,
  39. // the two must correspond.
  40. // For example, the path "yaml/v2" only corresponds to
  41. // semantic versions beginning with "v2.".
  42. func Check(path, version string) error {
  43. if err := CheckPath(path); err != nil {
  44. return err
  45. }
  46. if !semver.IsValid(version) {
  47. return fmt.Errorf("malformed semantic version %v", version)
  48. }
  49. _, pathMajor, _ := SplitPathVersion(path)
  50. if !MatchPathMajor(version, pathMajor) {
  51. if pathMajor == "" {
  52. pathMajor = "v0 or v1"
  53. }
  54. if pathMajor[0] == '.' { // .v1
  55. pathMajor = pathMajor[1:]
  56. }
  57. return fmt.Errorf("mismatched module path %v and version %v (want %v)", path, version, pathMajor)
  58. }
  59. return nil
  60. }
  61. // firstPathOK reports whether r can appear in the first element of a module path.
  62. // The first element of the path must be an LDH domain name, at least for now.
  63. // To avoid case ambiguity, the domain name must be entirely lower case.
  64. func firstPathOK(r rune) bool {
  65. return r == '-' || r == '.' ||
  66. '0' <= r && r <= '9' ||
  67. 'a' <= r && r <= 'z'
  68. }
  69. // pathOK reports whether r can appear in an import path element.
  70. // Paths can be ASCII letters, ASCII digits, and limited ASCII punctuation: + - . _ and ~.
  71. // This matches what "go get" has historically recognized in import paths.
  72. // TODO(rsc): We would like to allow Unicode letters, but that requires additional
  73. // care in the safe encoding (see note below).
  74. func pathOK(r rune) bool {
  75. if r < utf8.RuneSelf {
  76. return r == '+' || r == '-' || r == '.' || r == '_' || r == '~' ||
  77. '0' <= r && r <= '9' ||
  78. 'A' <= r && r <= 'Z' ||
  79. 'a' <= r && r <= 'z'
  80. }
  81. return false
  82. }
  83. // fileNameOK reports whether r can appear in a file name.
  84. // For now we allow all Unicode letters but otherwise limit to pathOK plus a few more punctuation characters.
  85. // If we expand the set of allowed characters here, we have to
  86. // work harder at detecting potential case-folding and normalization collisions.
  87. // See note about "safe encoding" below.
  88. func fileNameOK(r rune) bool {
  89. if r < utf8.RuneSelf {
  90. // Entire set of ASCII punctuation, from which we remove characters:
  91. // ! " # $ % & ' ( ) * + , - . / : ; < = > ? @ [ \ ] ^ _ ` { | } ~
  92. // We disallow some shell special characters: " ' * < > ? ` |
  93. // (Note that some of those are disallowed by the Windows file system as well.)
  94. // We also disallow path separators / : and \ (fileNameOK is only called on path element characters).
  95. // We allow spaces (U+0020) in file names.
  96. const allowed = "!#$%&()+,-.=@[]^_{}~ "
  97. if '0' <= r && r <= '9' || 'A' <= r && r <= 'Z' || 'a' <= r && r <= 'z' {
  98. return true
  99. }
  100. for i := 0; i < len(allowed); i++ {
  101. if rune(allowed[i]) == r {
  102. return true
  103. }
  104. }
  105. return false
  106. }
  107. // It may be OK to add more ASCII punctuation here, but only carefully.
  108. // For example Windows disallows < > \, and macOS disallows :, so we must not allow those.
  109. return unicode.IsLetter(r)
  110. }
  111. // CheckPath checks that a module path is valid.
  112. func CheckPath(path string) error {
  113. if err := checkPath(path, false); err != nil {
  114. return fmt.Errorf("malformed module path %q: %v", path, err)
  115. }
  116. i := strings.Index(path, "/")
  117. if i < 0 {
  118. i = len(path)
  119. }
  120. if i == 0 {
  121. return fmt.Errorf("malformed module path %q: leading slash", path)
  122. }
  123. if !strings.Contains(path[:i], ".") {
  124. return fmt.Errorf("malformed module path %q: missing dot in first path element", path)
  125. }
  126. if path[0] == '-' {
  127. return fmt.Errorf("malformed module path %q: leading dash in first path element", path)
  128. }
  129. for _, r := range path[:i] {
  130. if !firstPathOK(r) {
  131. return fmt.Errorf("malformed module path %q: invalid char %q in first path element", path, r)
  132. }
  133. }
  134. if _, _, ok := SplitPathVersion(path); !ok {
  135. return fmt.Errorf("malformed module path %q: invalid version", path)
  136. }
  137. return nil
  138. }
  139. // CheckImportPath checks that an import path is valid.
  140. func CheckImportPath(path string) error {
  141. if err := checkPath(path, false); err != nil {
  142. return fmt.Errorf("malformed import path %q: %v", path, err)
  143. }
  144. return nil
  145. }
  146. // checkPath checks that a general path is valid.
  147. // It returns an error describing why but not mentioning path.
  148. // Because these checks apply to both module paths and import paths,
  149. // the caller is expected to add the "malformed ___ path %q: " prefix.
  150. // fileName indicates whether the final element of the path is a file name
  151. // (as opposed to a directory name).
  152. func checkPath(path string, fileName bool) error {
  153. if !utf8.ValidString(path) {
  154. return fmt.Errorf("invalid UTF-8")
  155. }
  156. if path == "" {
  157. return fmt.Errorf("empty string")
  158. }
  159. if strings.Contains(path, "..") {
  160. return fmt.Errorf("double dot")
  161. }
  162. if strings.Contains(path, "//") {
  163. return fmt.Errorf("double slash")
  164. }
  165. if path[len(path)-1] == '/' {
  166. return fmt.Errorf("trailing slash")
  167. }
  168. elemStart := 0
  169. for i, r := range path {
  170. if r == '/' {
  171. if err := checkElem(path[elemStart:i], fileName); err != nil {
  172. return err
  173. }
  174. elemStart = i + 1
  175. }
  176. }
  177. if err := checkElem(path[elemStart:], fileName); err != nil {
  178. return err
  179. }
  180. return nil
  181. }
  182. // checkElem checks whether an individual path element is valid.
  183. // fileName indicates whether the element is a file name (not a directory name).
  184. func checkElem(elem string, fileName bool) error {
  185. if elem == "" {
  186. return fmt.Errorf("empty path element")
  187. }
  188. if strings.Count(elem, ".") == len(elem) {
  189. return fmt.Errorf("invalid path element %q", elem)
  190. }
  191. if elem[0] == '.' && !fileName {
  192. return fmt.Errorf("leading dot in path element")
  193. }
  194. if elem[len(elem)-1] == '.' {
  195. return fmt.Errorf("trailing dot in path element")
  196. }
  197. charOK := pathOK
  198. if fileName {
  199. charOK = fileNameOK
  200. }
  201. for _, r := range elem {
  202. if !charOK(r) {
  203. return fmt.Errorf("invalid char %q", r)
  204. }
  205. }
  206. // Windows disallows a bunch of path elements, sadly.
  207. // See https://docs.microsoft.com/en-us/windows/desktop/fileio/naming-a-file
  208. short := elem
  209. if i := strings.Index(short, "."); i >= 0 {
  210. short = short[:i]
  211. }
  212. for _, bad := range badWindowsNames {
  213. if strings.EqualFold(bad, short) {
  214. return fmt.Errorf("disallowed path element %q", elem)
  215. }
  216. }
  217. return nil
  218. }
  219. // CheckFilePath checks whether a slash-separated file path is valid.
  220. func CheckFilePath(path string) error {
  221. if err := checkPath(path, true); err != nil {
  222. return fmt.Errorf("malformed file path %q: %v", path, err)
  223. }
  224. return nil
  225. }
  226. // badWindowsNames are the reserved file path elements on Windows.
  227. // See https://docs.microsoft.com/en-us/windows/desktop/fileio/naming-a-file
  228. var badWindowsNames = []string{
  229. "CON",
  230. "PRN",
  231. "AUX",
  232. "NUL",
  233. "COM1",
  234. "COM2",
  235. "COM3",
  236. "COM4",
  237. "COM5",
  238. "COM6",
  239. "COM7",
  240. "COM8",
  241. "COM9",
  242. "LPT1",
  243. "LPT2",
  244. "LPT3",
  245. "LPT4",
  246. "LPT5",
  247. "LPT6",
  248. "LPT7",
  249. "LPT8",
  250. "LPT9",
  251. }
  252. // SplitPathVersion returns prefix and major version such that prefix+pathMajor == path
  253. // and version is either empty or "/vN" for N >= 2.
  254. // As a special case, gopkg.in paths are recognized directly;
  255. // they require ".vN" instead of "/vN", and for all N, not just N >= 2.
  256. func SplitPathVersion(path string) (prefix, pathMajor string, ok bool) {
  257. if strings.HasPrefix(path, "gopkg.in/") {
  258. return splitGopkgIn(path)
  259. }
  260. i := len(path)
  261. dot := false
  262. for i > 0 && ('0' <= path[i-1] && path[i-1] <= '9' || path[i-1] == '.') {
  263. if path[i-1] == '.' {
  264. dot = true
  265. }
  266. i--
  267. }
  268. if i <= 1 || i == len(path) || path[i-1] != 'v' || path[i-2] != '/' {
  269. return path, "", true
  270. }
  271. prefix, pathMajor = path[:i-2], path[i-2:]
  272. if dot || len(pathMajor) <= 2 || pathMajor[2] == '0' || pathMajor == "/v1" {
  273. return path, "", false
  274. }
  275. return prefix, pathMajor, true
  276. }
  277. // splitGopkgIn is like SplitPathVersion but only for gopkg.in paths.
  278. func splitGopkgIn(path string) (prefix, pathMajor string, ok bool) {
  279. if !strings.HasPrefix(path, "gopkg.in/") {
  280. return path, "", false
  281. }
  282. i := len(path)
  283. if strings.HasSuffix(path, "-unstable") {
  284. i -= len("-unstable")
  285. }
  286. for i > 0 && ('0' <= path[i-1] && path[i-1] <= '9') {
  287. i--
  288. }
  289. if i <= 1 || path[i-1] != 'v' || path[i-2] != '.' {
  290. // All gopkg.in paths must end in vN for some N.
  291. return path, "", false
  292. }
  293. prefix, pathMajor = path[:i-2], path[i-2:]
  294. if len(pathMajor) <= 2 || pathMajor[2] == '0' && pathMajor != ".v0" {
  295. return path, "", false
  296. }
  297. return prefix, pathMajor, true
  298. }
  299. // MatchPathMajor reports whether the semantic version v
  300. // matches the path major version pathMajor.
  301. func MatchPathMajor(v, pathMajor string) bool {
  302. if strings.HasPrefix(pathMajor, ".v") && strings.HasSuffix(pathMajor, "-unstable") {
  303. pathMajor = strings.TrimSuffix(pathMajor, "-unstable")
  304. }
  305. if strings.HasPrefix(v, "v0.0.0-") && pathMajor == ".v1" {
  306. // Allow old bug in pseudo-versions that generated v0.0.0- pseudoversion for gopkg .v1.
  307. // For example, gopkg.in/yaml.v2@v2.2.1's go.mod requires gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405.
  308. return true
  309. }
  310. m := semver.Major(v)
  311. if pathMajor == "" {
  312. return m == "v0" || m == "v1" || semver.Build(v) == "+incompatible"
  313. }
  314. return (pathMajor[0] == '/' || pathMajor[0] == '.') && m == pathMajor[1:]
  315. }
  316. // CanonicalVersion returns the canonical form of the version string v.
  317. // It is the same as semver.Canonical(v) except that it preserves the special build suffix "+incompatible".
  318. func CanonicalVersion(v string) string {
  319. cv := semver.Canonical(v)
  320. if semver.Build(v) == "+incompatible" {
  321. cv += "+incompatible"
  322. }
  323. return cv
  324. }
  325. // Sort sorts the list by Path, breaking ties by comparing Versions.
  326. func Sort(list []Version) {
  327. sort.Slice(list, func(i, j int) bool {
  328. mi := list[i]
  329. mj := list[j]
  330. if mi.Path != mj.Path {
  331. return mi.Path < mj.Path
  332. }
  333. // To help go.sum formatting, allow version/file.
  334. // Compare semver prefix by semver rules,
  335. // file by string order.
  336. vi := mi.Version
  337. vj := mj.Version
  338. var fi, fj string
  339. if k := strings.Index(vi, "/"); k >= 0 {
  340. vi, fi = vi[:k], vi[k:]
  341. }
  342. if k := strings.Index(vj, "/"); k >= 0 {
  343. vj, fj = vj[:k], vj[k:]
  344. }
  345. if vi != vj {
  346. return semver.Compare(vi, vj) < 0
  347. }
  348. return fi < fj
  349. })
  350. }
  351. // Safe encodings
  352. //
  353. // Module paths appear as substrings of file system paths
  354. // (in the download cache) and of web server URLs in the proxy protocol.
  355. // In general we cannot rely on file systems to be case-sensitive,
  356. // nor can we rely on web servers, since they read from file systems.
  357. // That is, we cannot rely on the file system to keep rsc.io/QUOTE
  358. // and rsc.io/quote separate. Windows and macOS don't.
  359. // Instead, we must never require two different casings of a file path.
  360. // Because we want the download cache to match the proxy protocol,
  361. // and because we want the proxy protocol to be possible to serve
  362. // from a tree of static files (which might be stored on a case-insensitive
  363. // file system), the proxy protocol must never require two different casings
  364. // of a URL path either.
  365. //
  366. // One possibility would be to make the safe encoding be the lowercase
  367. // hexadecimal encoding of the actual path bytes. This would avoid ever
  368. // needing different casings of a file path, but it would be fairly illegible
  369. // to most programmers when those paths appeared in the file system
  370. // (including in file paths in compiler errors and stack traces)
  371. // in web server logs, and so on. Instead, we want a safe encoding that
  372. // leaves most paths unaltered.
  373. //
  374. // The safe encoding is this:
  375. // replace every uppercase letter with an exclamation mark
  376. // followed by the letter's lowercase equivalent.
  377. //
  378. // For example,
  379. // github.com/Azure/azure-sdk-for-go -> github.com/!azure/azure-sdk-for-go.
  380. // github.com/GoogleCloudPlatform/cloudsql-proxy -> github.com/!google!cloud!platform/cloudsql-proxy
  381. // github.com/Sirupsen/logrus -> github.com/!sirupsen/logrus.
  382. //
  383. // Import paths that avoid upper-case letters are left unchanged.
  384. // Note that because import paths are ASCII-only and avoid various
  385. // problematic punctuation (like : < and >), the safe encoding is also ASCII-only
  386. // and avoids the same problematic punctuation.
  387. //
  388. // Import paths have never allowed exclamation marks, so there is no
  389. // need to define how to encode a literal !.
  390. //
  391. // Although paths are disallowed from using Unicode (see pathOK above),
  392. // the eventual plan is to allow Unicode letters as well, to assume that
  393. // file systems and URLs are Unicode-safe (storing UTF-8), and apply
  394. // the !-for-uppercase convention. Note however that not all runes that
  395. // are different but case-fold equivalent are an upper/lower pair.
  396. // For example, U+004B ('K'), U+006B ('k'), and U+212A ('K' for Kelvin)
  397. // are considered to case-fold to each other. When we do add Unicode
  398. // letters, we must not assume that upper/lower are the only case-equivalent pairs.
  399. // Perhaps the Kelvin symbol would be disallowed entirely, for example.
  400. // Or perhaps it would encode as "!!k", or perhaps as "(212A)".
  401. //
  402. // Also, it would be nice to allow Unicode marks as well as letters,
  403. // but marks include combining marks, and then we must deal not
  404. // only with case folding but also normalization: both U+00E9 ('é')
  405. // and U+0065 U+0301 ('e' followed by combining acute accent)
  406. // look the same on the page and are treated by some file systems
  407. // as the same path. If we do allow Unicode marks in paths, there
  408. // must be some kind of normalization to allow only one canonical
  409. // encoding of any character used in an import path.
  410. // EncodePath returns the safe encoding of the given module path.
  411. // It fails if the module path is invalid.
  412. func EncodePath(path string) (encoding string, err error) {
  413. if err := CheckPath(path); err != nil {
  414. return "", err
  415. }
  416. return encodeString(path)
  417. }
  418. // EncodeVersion returns the safe encoding of the given module version.
  419. // Versions are allowed to be in non-semver form but must be valid file names
  420. // and not contain exclamation marks.
  421. func EncodeVersion(v string) (encoding string, err error) {
  422. if err := checkElem(v, true); err != nil || strings.Contains(v, "!") {
  423. return "", fmt.Errorf("disallowed version string %q", v)
  424. }
  425. return encodeString(v)
  426. }
  427. func encodeString(s string) (encoding string, err error) {
  428. haveUpper := false
  429. for _, r := range s {
  430. if r == '!' || r >= utf8.RuneSelf {
  431. // This should be disallowed by CheckPath, but diagnose anyway.
  432. // The correctness of the encoding loop below depends on it.
  433. return "", fmt.Errorf("internal error: inconsistency in EncodePath")
  434. }
  435. if 'A' <= r && r <= 'Z' {
  436. haveUpper = true
  437. }
  438. }
  439. if !haveUpper {
  440. return s, nil
  441. }
  442. var buf []byte
  443. for _, r := range s {
  444. if 'A' <= r && r <= 'Z' {
  445. buf = append(buf, '!', byte(r+'a'-'A'))
  446. } else {
  447. buf = append(buf, byte(r))
  448. }
  449. }
  450. return string(buf), nil
  451. }
  452. // DecodePath returns the module path of the given safe encoding.
  453. // It fails if the encoding is invalid or encodes an invalid path.
  454. func DecodePath(encoding string) (path string, err error) {
  455. path, ok := decodeString(encoding)
  456. if !ok {
  457. return "", fmt.Errorf("invalid module path encoding %q", encoding)
  458. }
  459. if err := CheckPath(path); err != nil {
  460. return "", fmt.Errorf("invalid module path encoding %q: %v", encoding, err)
  461. }
  462. return path, nil
  463. }
  464. // DecodeVersion returns the version string for the given safe encoding.
  465. // It fails if the encoding is invalid or encodes an invalid version.
  466. // Versions are allowed to be in non-semver form but must be valid file names
  467. // and not contain exclamation marks.
  468. func DecodeVersion(encoding string) (v string, err error) {
  469. v, ok := decodeString(encoding)
  470. if !ok {
  471. return "", fmt.Errorf("invalid version encoding %q", encoding)
  472. }
  473. if err := checkElem(v, true); err != nil {
  474. return "", fmt.Errorf("disallowed version string %q", v)
  475. }
  476. return v, nil
  477. }
  478. func decodeString(encoding string) (string, bool) {
  479. var buf []byte
  480. bang := false
  481. for _, r := range encoding {
  482. if r >= utf8.RuneSelf {
  483. return "", false
  484. }
  485. if bang {
  486. bang = false
  487. if r < 'a' || 'z' < r {
  488. return "", false
  489. }
  490. buf = append(buf, byte(r+'A'-'a'))
  491. continue
  492. }
  493. if r == '!' {
  494. bang = true
  495. continue
  496. }
  497. if 'A' <= r && r <= 'Z' {
  498. return "", false
  499. }
  500. buf = append(buf, byte(r))
  501. }
  502. if bang {
  503. return "", false
  504. }
  505. return string(buf), true
  506. }