group_filter.go 3.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140
  1. package main
  2. // 数据复杂分组测试
  3. func main() {
  4. // 分组策略列表
  5. policies := []GroupPolicy{
  6. {Area, Group},
  7. {Company, Hash},
  8. {Title, Group},
  9. }
  10. // 待分组数据记录列表
  11. items := []RecordItem{
  12. {Area: "", Company: "", Title: "", IsLeader: ""},
  13. {Area: "", Company: "", Title: "", IsLeader: ""},
  14. }
  15. // 根级,外面再加一层是为了适配下面的循环处理
  16. rootClusters := FilterClusters{
  17. {Seq: 0, Items: items},
  18. }
  19. // 循环变量
  20. clusters := rootClusters
  21. for _, policy := range policies {
  22. // 依次按照分组策略处理
  23. clusters = clusters.filter(policy)
  24. if nil == clusters || len(clusters) == 0 {
  25. return
  26. }
  27. }
  28. if len(clusters) == 0 {
  29. return
  30. }
  31. // 获得平铺的叶子节点
  32. var results []RecordItem
  33. for _, cluster := range clusters {
  34. if nil == cluster.Items || len(cluster.Items) == 0 {
  35. continue
  36. }
  37. results = append(results, cluster.Items...)
  38. }
  39. if nil == results || len(results) == 0 {
  40. return
  41. }
  42. // 求箱子数量
  43. length := len(results)
  44. groupSize := 50
  45. groupCount := length / groupSize
  46. if length%groupSize > 0 {
  47. groupCount++
  48. }
  49. // TODO 最后把results塞进箱子里就是
  50. }
  51. type RecordItem map[string]string // 数据记录
  52. // 聚类,多叉树形结构
  53. type FilterCluster struct {
  54. Seq int // 序号
  55. Items []RecordItem // 组内记录列表
  56. SubClusters FilterClusters // 子级聚类集合
  57. }
  58. type FilterClusters []FilterCluster // 聚类集合
  59. // 分组策略字段
  60. const (
  61. Area = "area"
  62. Company = "company"
  63. Title = "title"
  64. IsLeader = "isLeader"
  65. )
  66. // 分组策略类型
  67. type FilterType int8
  68. const (
  69. Random FilterType = iota // 随机
  70. Group // 聚合
  71. Hash // 散列
  72. )
  73. // 分组策略
  74. type GroupPolicy struct {
  75. Name string
  76. Type FilterType
  77. }
  78. // 策略模式,分支处理,返回当前聚类集合再进一步处理的子级聚类集合
  79. func (clusters *FilterClusters) filter(policy GroupPolicy) FilterClusters {
  80. if policy.Type == Random {
  81. return clusters.mapping(policy.Name, random)
  82. } else if policy.Type == Group {
  83. return clusters.mapping(policy.Name, group)
  84. } else if policy.Type == Hash {
  85. return clusters.mapping(policy.Name, hash)
  86. }
  87. return nil
  88. }
  89. // 将子级聚类集合映射关联到父级,如此,虽然仅返回子级聚类集合,但通过根级对象可以遍历整棵树
  90. func (clusters *FilterClusters) mapping(name string, fn func(string, FilterCluster) FilterClusters) FilterClusters {
  91. var allSubClusters FilterClusters
  92. for i := 0; i < len(*clusters); i++ {
  93. subClusters := fn(name, (*clusters)[i])
  94. if nil == subClusters || len(subClusters) == 0 {
  95. continue
  96. }
  97. (*clusters)[i].SubClusters = subClusters
  98. allSubClusters = append(allSubClusters, subClusters...)
  99. }
  100. if nil == allSubClusters || len(allSubClusters) == 0 {
  101. return nil
  102. }
  103. return allSubClusters
  104. }
  105. // TODO 随机算法实现
  106. func random(name string, cluster FilterCluster) FilterClusters {
  107. // 建议子级聚类数量 = 当前聚类中数据记录内,指定name字段的取值数
  108. // 记录随机排列即可
  109. return nil
  110. }
  111. // TODO 聚合算法实现
  112. func group(name string, cluster FilterCluster) FilterClusters {
  113. // 建议子级聚类数量 = 当前聚类中数据记录内,指定name字段的取值数
  114. // 简单近似处理,group by即可
  115. // 求最优解处理,建议多次迭代的kmeans
  116. return nil
  117. }
  118. // TODO 散列算法实现
  119. func hash(name string, cluster FilterCluster) FilterClusters {
  120. // 建议子级聚类数量 = 当前聚类中数据记录内,指定name字段的取值数
  121. // 简单近似处理,写个循环分布一下数据即可
  122. // 求最优解处理,建议多次迭代的kmeans
  123. return nil
  124. }