You can not select more than 25 topics Topics must start with a letter or number, can include dashes ('-') and can be up to 35 characters long.

264 lines
6.8 KiB

  1. package git
  2. import (
  3. "io"
  4. "sort"
  5. "gopkg.in/src-d/go-git.v4/plumbing"
  6. "gopkg.in/src-d/go-git.v4/plumbing/object"
  7. "gopkg.in/src-d/go-git.v4/utils/diff"
  8. "github.com/sergi/go-diff/diffmatchpatch"
  9. )
  10. // References returns a slice of Commits for the file at "path", starting from
  11. // the commit provided that contains the file from the provided path. The last
  12. // commit into the returned slice is the commit where the file was created.
  13. // If the provided commit does not contains the specified path, a nil slice is
  14. // returned. The commits are sorted in commit order, newer to older.
  15. //
  16. // Caveats:
  17. //
  18. // - Moves and copies are not currently supported.
  19. //
  20. // - Cherry-picks are not detected unless there are no commits between them and
  21. // therefore can appear repeated in the list. (see git path-id for hints on how
  22. // to fix this).
  23. func references(c *object.Commit, path string) ([]*object.Commit, error) {
  24. var result []*object.Commit
  25. seen := make(map[plumbing.Hash]struct{})
  26. if err := walkGraph(&result, &seen, c, path); err != nil {
  27. return nil, err
  28. }
  29. // TODO result should be returned without ordering
  30. sortCommits(result)
  31. // for merges of identical cherry-picks
  32. return removeComp(path, result, equivalent)
  33. }
  34. type commitSorterer struct {
  35. l []*object.Commit
  36. }
  37. func (s commitSorterer) Len() int {
  38. return len(s.l)
  39. }
  40. func (s commitSorterer) Less(i, j int) bool {
  41. return s.l[i].Committer.When.Before(s.l[j].Committer.When) ||
  42. s.l[i].Committer.When.Equal(s.l[j].Committer.When) &&
  43. s.l[i].Author.When.Before(s.l[j].Author.When)
  44. }
  45. func (s commitSorterer) Swap(i, j int) {
  46. s.l[i], s.l[j] = s.l[j], s.l[i]
  47. }
  48. // SortCommits sorts a commit list by commit date, from older to newer.
  49. func sortCommits(l []*object.Commit) {
  50. s := &commitSorterer{l}
  51. sort.Sort(s)
  52. }
  53. // Recursive traversal of the commit graph, generating a linear history of the
  54. // path.
  55. func walkGraph(result *[]*object.Commit, seen *map[plumbing.Hash]struct{}, current *object.Commit, path string) error {
  56. // check and update seen
  57. if _, ok := (*seen)[current.Hash]; ok {
  58. return nil
  59. }
  60. (*seen)[current.Hash] = struct{}{}
  61. // if the path is not in the current commit, stop searching.
  62. if _, err := current.File(path); err != nil {
  63. return nil
  64. }
  65. // optimization: don't traverse branches that does not
  66. // contain the path.
  67. parents, err := parentsContainingPath(path, current)
  68. if err != nil {
  69. return err
  70. }
  71. switch len(parents) {
  72. // if the path is not found in any of its parents, the path was
  73. // created by this commit; we must add it to the revisions list and
  74. // stop searching. This includes the case when current is the
  75. // initial commit.
  76. case 0:
  77. *result = append(*result, current)
  78. return nil
  79. case 1: // only one parent contains the path
  80. // if the file contents has change, add the current commit
  81. different, err := differentContents(path, current, parents)
  82. if err != nil {
  83. return err
  84. }
  85. if len(different) == 1 {
  86. *result = append(*result, current)
  87. }
  88. // in any case, walk the parent
  89. return walkGraph(result, seen, parents[0], path)
  90. default: // more than one parent contains the path
  91. // TODO: detect merges that had a conflict, because they must be
  92. // included in the result here.
  93. for _, p := range parents {
  94. err := walkGraph(result, seen, p, path)
  95. if err != nil {
  96. return err
  97. }
  98. }
  99. }
  100. return nil
  101. }
  102. func parentsContainingPath(path string, c *object.Commit) ([]*object.Commit, error) {
  103. // TODO: benchmark this method making git.object.Commit.parent public instead of using
  104. // an iterator
  105. var result []*object.Commit
  106. iter := c.Parents()
  107. for {
  108. parent, err := iter.Next()
  109. if err == io.EOF {
  110. return result, nil
  111. }
  112. if err != nil {
  113. return nil, err
  114. }
  115. if _, err := parent.File(path); err == nil {
  116. result = append(result, parent)
  117. }
  118. }
  119. }
  120. // Returns an slice of the commits in "cs" that has the file "path", but with different
  121. // contents than what can be found in "c".
  122. func differentContents(path string, c *object.Commit, cs []*object.Commit) ([]*object.Commit, error) {
  123. result := make([]*object.Commit, 0, len(cs))
  124. h, found := blobHash(path, c)
  125. if !found {
  126. return nil, object.ErrFileNotFound
  127. }
  128. for _, cx := range cs {
  129. if hx, found := blobHash(path, cx); found && h != hx {
  130. result = append(result, cx)
  131. }
  132. }
  133. return result, nil
  134. }
  135. // blobHash returns the hash of a path in a commit
  136. func blobHash(path string, commit *object.Commit) (hash plumbing.Hash, found bool) {
  137. file, err := commit.File(path)
  138. if err != nil {
  139. var empty plumbing.Hash
  140. return empty, found
  141. }
  142. return file.Hash, true
  143. }
  144. type contentsComparatorFn func(path string, a, b *object.Commit) (bool, error)
  145. // Returns a new slice of commits, with duplicates removed. Expects a
  146. // sorted commit list. Duplication is defined according to "comp". It
  147. // will always keep the first commit of a series of duplicated commits.
  148. func removeComp(path string, cs []*object.Commit, comp contentsComparatorFn) ([]*object.Commit, error) {
  149. result := make([]*object.Commit, 0, len(cs))
  150. if len(cs) == 0 {
  151. return result, nil
  152. }
  153. result = append(result, cs[0])
  154. for i := 1; i < len(cs); i++ {
  155. equals, err := comp(path, cs[i], cs[i-1])
  156. if err != nil {
  157. return nil, err
  158. }
  159. if !equals {
  160. result = append(result, cs[i])
  161. }
  162. }
  163. return result, nil
  164. }
  165. // Equivalent commits are commits whose patch is the same.
  166. func equivalent(path string, a, b *object.Commit) (bool, error) {
  167. numParentsA := a.NumParents()
  168. numParentsB := b.NumParents()
  169. // the first commit is not equivalent to anyone
  170. // and "I think" merges can not be equivalent to anything
  171. if numParentsA != 1 || numParentsB != 1 {
  172. return false, nil
  173. }
  174. diffsA, err := patch(a, path)
  175. if err != nil {
  176. return false, err
  177. }
  178. diffsB, err := patch(b, path)
  179. if err != nil {
  180. return false, err
  181. }
  182. return sameDiffs(diffsA, diffsB), nil
  183. }
  184. func patch(c *object.Commit, path string) ([]diffmatchpatch.Diff, error) {
  185. // get contents of the file in the commit
  186. file, err := c.File(path)
  187. if err != nil {
  188. return nil, err
  189. }
  190. content, err := file.Contents()
  191. if err != nil {
  192. return nil, err
  193. }
  194. // get contents of the file in the first parent of the commit
  195. var contentParent string
  196. iter := c.Parents()
  197. parent, err := iter.Next()
  198. if err != nil {
  199. return nil, err
  200. }
  201. file, err = parent.File(path)
  202. if err != nil {
  203. contentParent = ""
  204. } else {
  205. contentParent, err = file.Contents()
  206. if err != nil {
  207. return nil, err
  208. }
  209. }
  210. // compare the contents of parent and child
  211. return diff.Do(content, contentParent), nil
  212. }
  213. func sameDiffs(a, b []diffmatchpatch.Diff) bool {
  214. if len(a) != len(b) {
  215. return false
  216. }
  217. for i := range a {
  218. if !sameDiff(a[i], b[i]) {
  219. return false
  220. }
  221. }
  222. return true
  223. }
  224. func sameDiff(a, b diffmatchpatch.Diff) bool {
  225. if a.Type != b.Type {
  226. return false
  227. }
  228. switch a.Type {
  229. case 0:
  230. return countLines(a.Text) == countLines(b.Text)
  231. case 1, -1:
  232. return a.Text == b.Text
  233. default:
  234. panic("unreachable")
  235. }
  236. }