29、ElasticSearch 7.3 实战:聚合实战之使用Java api实现电视案例

一、数据准备

创建索引及映射

建立价格、颜色、品牌、售卖日期字段

PUT /tvs
PUT /tvs/_mapping
{
  "properties": {
    "price": {
      "type": "long"
    },
    "color": {
      "type": "keyword"
    },
    "brand": {
      "type": "keyword"
    },
    "sold_date": {
      "type": "date"
    }
  }
}

插入数据

POST /tvs/_bulk
{"index":{}}
{"price":1000,"color":"红色","brand":"长虹","sold_date":"2019-10-28"}
{"index":{}}
{"price":2000,"color":"红色","brand":"长虹","sold_date":"2019-11-05"}
{"index":{}}
{"price":3000,"color":"绿色","brand":"小米","sold_date":"2019-05-18"}
{"index":{}}
{"price":1500,"color":"蓝色","brand":"TCL","sold_date":"2019-07-02"}
{"index":{}}
{"price":1200,"color":"绿色","brand":"TCL","sold_date":"2019-08-19"}
{"index":{}}
{"price":2000,"color":"红色","brand":"长虹","sold_date":"2019-11-05"}
{"index":{}}
{"price":8000,"color":"红色","brand":"三星","sold_date":"2020-01-01"}
{"index":{}}
{"price":2500,"color":"蓝色","brand":"小米","sold_date":"2020-02-12"}

二、 按照颜色分组,计算每个颜色卖出的个数

ES语句

GET /tvs/_search
{
  "size": 0,
  "query": {
    "match_all": {}
  },
  "aggs": {
    "group_by_color": {
      "terms": {
        "field": "color"
      }
    }
  }
}

返回

{
  "took" : 1,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 8,
      "relation" : "eq"
    },
    "max_score" : null,
    "hits" : [ ]
  },
  "aggregations" : {
    "group_by_color" : {
      "doc_count_error_upper_bound" : 0,
      "sum_other_doc_count" : 0,
      "buckets" : [
        {
          "key" : "红色",
          "doc_count" : 4
        },
        {
          "key" : "绿色",
          "doc_count" : 2
        },
        {
          "key" : "蓝色",
          "doc_count" : 2
        }
      ]
    }
  }
}

Java代码

//按照颜色分组,计算每个颜色卖出的个数
    @Test
    public void testAggs() throws IOException {
        //1 构建请求
        SearchRequest searchRequest=new SearchRequest("tvs");
        //请求体
        SearchSourceBuilder searchSourceBuilder=new SearchSourceBuilder();
        searchSourceBuilder.size(0);
        searchSourceBuilder.query(QueryBuilders.matchAllQuery());
        TermsAggregationBuilder termsAggregationBuilder = AggregationBuilders.terms("group_by_color").field("color");
        searchSourceBuilder.aggregation(termsAggregationBuilder);
        //请求体放入请求头
        searchRequest.source(searchSourceBuilder);
        //2 执行
        SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
        Aggregations aggregations = searchResponse.getAggregations();
        Terms group_by_color = aggregations.get("group_by_color");
        List<? extends Terms.Bucket> buckets = group_by_color.getBuckets();
        for (Terms.Bucket bucket : buckets) {
            String key = bucket.getKeyAsString();
            System.out.println("key:"+key);
            long docCount = bucket.getDocCount();
            System.out.println("docCount:"+docCount);
            System.out.println("=================================");
        }
    }

结果

 

三、按照颜色分组,计算每个颜色卖出的个数,每个颜色卖出的平均价格

ES语句

GET /tvs/_search
{
  "size": 0,
  "query": {
    "match_all": {}
  },
  "aggs": {
    "group_by_color": {
      "terms": {
        "field": "color"
      },
      "aggs": {
        "avg_price": {
          "avg": {
            "field": "price"
          }
        }
      }
    }
  }
}

返回结果

{
  "took" : 2,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 8,
      "relation" : "eq"
    },
    "max_score" : null,
    "hits" : [ ]
  },
  "aggregations" : {
    "group_by_color" : {
      "doc_count_error_upper_bound" : 0,
      "sum_other_doc_count" : 0,
      "buckets" : [
        {
          "key" : "红色",
          "doc_count" : 4,
          "avg_price" : {
            "value" : 3250.0
          }
        },
        {
          "key" : "绿色",
          "doc_count" : 2,
          "avg_price" : {
            "value" : 2100.0
          }
        },
        {
          "key" : "蓝色",
          "doc_count" : 2,
          "avg_price" : {
            "value" : 2000.0
          }
        }
      ]
    }
  }
}

Java代码

// 按照颜色分组,计算每个颜色卖出的个数,每个颜色卖出的平均价格
    @Test
    public void testAggsAndAvg() throws IOException {
        //1 构建请求
        SearchRequest searchRequest=new SearchRequest("tvs");
        //请求体
        SearchSourceBuilder searchSourceBuilder=new SearchSourceBuilder();
        searchSourceBuilder.size(0);
        searchSourceBuilder.query(QueryBuilders.matchAllQuery());
        TermsAggregationBuilder termsAggregationBuilder = AggregationBuilders.terms("group_by_color").field("color");
        //terms聚合下填充一个子聚合
        AvgAggregationBuilder avgAggregationBuilder = AggregationBuilders.avg("avg_price").field("price");
        termsAggregationBuilder.subAggregation(avgAggregationBuilder);
        searchSourceBuilder.aggregation(termsAggregationBuilder);
        //请求体放入请求头
        searchRequest.source(searchSourceBuilder);
        //2 执行
        SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
        Aggregations aggregations = searchResponse.getAggregations();
        Terms group_by_color = aggregations.get("group_by_color");
        List<? extends Terms.Bucket> buckets = group_by_color.getBuckets();
        for (Terms.Bucket bucket : buckets) {
            String key = bucket.getKeyAsString();
            System.out.println("key:"+key);
            long docCount = bucket.getDocCount();
            System.out.println("docCount:"+docCount);
            Aggregations aggregations1 = bucket.getAggregations();
            Avg avg_price = aggregations1.get("avg_price");
            double value = avg_price.getValue();
            System.out.println("value:"+value);
            System.out.println("=================================");
        }
    }

返回结果

 

四、按照颜色分组,计算每个颜色卖出的个数,以及每个颜色卖出的平均值、最大值、最小值、总和

ES语句

GET /tvs/_search
{
  "size": 0,
  "aggs": {
    "group_by_color": {
      "terms": {
        "field": "color"
      },
      "aggs": {
        "avg_price": {
          "avg": {
            "field": "price"
          }
        },
        "min_price": {
          "min": {
            "field": "price"
          }
        },
        "max_price": {
          "max": {
            "field": "price"
          }
        },
        "sum_price": {
          "sum": {
            "field": "price"
          }
        }
      }
    }
  }
}

返回结果

{
  "took" : 1,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 8,
      "relation" : "eq"
    },
    "max_score" : null,
    "hits" : [ ]
  },
  "aggregations" : {
    "group_by_color" : {
      "doc_count_error_upper_bound" : 0,
      "sum_other_doc_count" : 0,
      "buckets" : [
        {
          "key" : "红色",
          "doc_count" : 4,
          "max_price" : {
            "value" : 8000.0
          },
          "min_price" : {
            "value" : 1000.0
          },
          "avg_price" : {
            "value" : 3250.0
          },
          "sum_price" : {
            "value" : 13000.0
          }
        },
        {
          "key" : "绿色",
          "doc_count" : 2,
          "max_price" : {
            "value" : 3000.0
          },
          "min_price" : {
            "value" : 1200.0
          },
          "avg_price" : {
            "value" : 2100.0
          },
          "sum_price" : {
            "value" : 4200.0
          }
        },
        {
          "key" : "蓝色",
          "doc_count" : 2,
          "max_price" : {
            "value" : 2500.0
          },
          "min_price" : {
            "value" : 1500.0
          },
          "avg_price" : {
            "value" : 2000.0
          },
          "sum_price" : {
            "value" : 4000.0
          }
        }
      ]
    }
  }
}

Java代码

    // 按照颜色分组,计算每个颜色卖出的个数,以及每个颜色卖出的平均值、最大值、最小值、总和。
    @Test
    public void testAggsAndMore() throws IOException {
        //1 构建请求
        SearchRequest searchRequest=new SearchRequest("tvs");
        //请求体
        SearchSourceBuilder searchSourceBuilder=new SearchSourceBuilder();
        searchSourceBuilder.size(0);
        searchSourceBuilder.query(QueryBuilders.matchAllQuery());
        TermsAggregationBuilder termsAggregationBuilder = AggregationBuilders.terms("group_by_color").field("color");
        //termsAggregationBuilder里放入多个子聚合
        AvgAggregationBuilder avgAggregationBuilder = AggregationBuilders.avg("avg_price").field("price");
        MinAggregationBuilder minAggregationBuilder = AggregationBuilders.min("min_price").field("price");
        MaxAggregationBuilder maxAggregationBuilder = AggregationBuilders.max("max_price").field("price");
        SumAggregationBuilder sumAggregationBuilder = AggregationBuilders.sum("sum_price").field("price");

        termsAggregationBuilder.subAggregation(avgAggregationBuilder);
        termsAggregationBuilder.subAggregation(minAggregationBuilder);
        termsAggregationBuilder.subAggregation(maxAggregationBuilder);
        termsAggregationBuilder.subAggregation(sumAggregationBuilder);
        searchSourceBuilder.aggregation(termsAggregationBuilder);
        //请求体放入请求头
        searchRequest.source(searchSourceBuilder);
        //2 执行
        SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
        Aggregations aggregations = searchResponse.getAggregations();
        Terms group_by_color = aggregations.get("group_by_color");
        List<? extends Terms.Bucket> buckets = group_by_color.getBuckets();
        for (Terms.Bucket bucket : buckets) {
            String key = bucket.getKeyAsString();
            System.out.println("key:"+key);

            long docCount = bucket.getDocCount();
            System.out.println("docCount:"+docCount);

            Aggregations aggregations1 = bucket.getAggregations();

            Max max_price = aggregations1.get("max_price");
            double maxPriceValue = max_price.getValue();
            System.out.println("maxPriceValue:"+maxPriceValue);

            Min min_price = aggregations1.get("min_price");
            double minPriceValue = min_price.getValue();
            System.out.println("minPriceValue:"+minPriceValue);

            Avg avg_price = aggregations1.get("avg_price");
            double avgPriceValue = avg_price.getValue();
            System.out.println("avgPriceValue:"+avgPriceValue);

            Sum sum_price = aggregations1.get("sum_price");
            double sumPriceValue = sum_price.getValue();
            System.out.println("sumPriceValue:"+sumPriceValue);

            System.out.println("=================================");
        }
    }

返回结果

 

五、按照售价每2000价格划分范围,算出每个区间的销售总额

ES语句

GET /tvs/_search
{
  "size": 0,
  "aggs": {
    "by_histogram": {
      "histogram": {
        "field": "price",
        "interval": 2000
      },
      "aggs": {
        "income": {
          "sum": {
            "field": "price"
          }
        }
      }
    }
  }
}

返回结果

查看代码


{
  "took" : 0,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 8,
      "relation" : "eq"
    },
    "max_score" : null,
    "hits" : [ ]
  },
  "aggregations" : {
    "by_histogram" : {
      "buckets" : [
        {
          "key" : 0.0,
          "doc_count" : 3,
          "income" : {
            "value" : 3700.0
          }
        },
        {
          "key" : 2000.0,
          "doc_count" : 4,
          "income" : {
            "value" : 9500.0
          }
        },
        {
          "key" : 4000.0,
          "doc_count" : 0,
          "income" : {
            "value" : 0.0
          }
        },
        {
          "key" : 6000.0,
          "doc_count" : 0,
          "income" : {
            "value" : 0.0
          }
        },
        {
          "key" : 8000.0,
          "doc_count" : 1,
          "income" : {
            "value" : 8000.0
          }
        }
      ]
    }
  }
}

Java代码

    // 按照售价每2000价格划分范围,算出每个区间的销售总额 histogram
    @Test
    public void testAggsAndHistogram() throws IOException {
        //1 构建请求
        SearchRequest searchRequest=new SearchRequest("tvs");
        //请求体
        SearchSourceBuilder searchSourceBuilder=new SearchSourceBuilder();
        searchSourceBuilder.size(0);
        searchSourceBuilder.query(QueryBuilders.matchAllQuery());
        HistogramAggregationBuilder histogramAggregationBuilder = 
                AggregationBuilders.histogram("by_histogram").field("price").interval(2000);
        SumAggregationBuilder sumAggregationBuilder = AggregationBuilders.sum("income").field("price");
        histogramAggregationBuilder.subAggregation(sumAggregationBuilder);
        searchSourceBuilder.aggregation(histogramAggregationBuilder);
        //请求体放入请求头
        searchRequest.source(searchSourceBuilder);
        //2 执行
        SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
        Aggregations aggregations = searchResponse.getAggregations();
        Histogram group_by_color = aggregations.get("by_histogram");
        List<? extends Histogram.Bucket> buckets = group_by_color.getBuckets();
        for (Histogram.Bucket bucket : buckets) {
            String keyAsString = bucket.getKeyAsString();
            System.out.println("keyAsString:"+keyAsString);
            long docCount = bucket.getDocCount();
            System.out.println("docCount:"+docCount);

            Aggregations aggregations1 = bucket.getAggregations();
            Sum income = aggregations1.get("income");
            double value = income.getValue();
            System.out.println("value:"+value);

            System.out.println("=================================");
        }
    }

返回结果

 

六、计算每个季度的销售总额

ES语句

GET /tvs/_search
{
  "size": 0,
  "aggs": {
    "sales": {
      "date_histogram": {
        "field": "sold_date",
        "interval": "quarter",
        "format": "yyyy-MM-dd",
        "min_doc_count": 0,
        "extended_bounds": {
          "min": "2019-01-01",
          "max": "2020-12-31"
        }
      },
      "aggs": {
        "income": {
          "sum": {
            "field": "price"
          }
        }
      }
    }
  }
}

返回结果

查看代码


#! Deprecation: [interval] on [date_histogram] is deprecated, use [fixed_interval] or [calendar_interval] in the future.
{
  "took" : 6,
  "timed_out" : false,
  "_shards" : {
    "total" : 1,
    "successful" : 1,
    "skipped" : 0,
    "failed" : 0
  },
  "hits" : {
    "total" : {
      "value" : 8,
      "relation" : "eq"
    },
    "max_score" : null,
    "hits" : [ ]
  },
  "aggregations" : {
    "sales" : {
      "buckets" : [
        {
          "key_as_string" : "2019-01-01",
          "key" : 1546300800000,
          "doc_count" : 0,
          "income" : {
            "value" : 0.0
          }
        },
        {
          "key_as_string" : "2019-04-01",
          "key" : 1554076800000,
          "doc_count" : 1,
          "income" : {
            "value" : 3000.0
          }
        },
        {
          "key_as_string" : "2019-07-01",
          "key" : 1561939200000,
          "doc_count" : 2,
          "income" : {
            "value" : 2700.0
          }
        },
        {
          "key_as_string" : "2019-10-01",
          "key" : 1569888000000,
          "doc_count" : 3,
          "income" : {
            "value" : 5000.0
          }
        },
        {
          "key_as_string" : "2020-01-01",
          "key" : 1577836800000,
          "doc_count" : 2,
          "income" : {
            "value" : 10500.0
          }
        },
        {
          "key_as_string" : "2020-04-01",
          "key" : 1585699200000,
          "doc_count" : 0,
          "income" : {
            "value" : 0.0
          }
        },
        {
          "key_as_string" : "2020-07-01",
          "key" : 1593561600000,
          "doc_count" : 0,
          "income" : {
            "value" : 0.0
          }
        },
        {
          "key_as_string" : "2020-10-01",
          "key" : 1601510400000,
          "doc_count" : 0,
          "income" : {
            "value" : 0.0
          }
        }
      ]
    }
  }
}

Java代码

    // 计算每个季度的销售总额
    @Test
    public void testAggsAndDateHistogram() throws IOException {
        //1 构建请求
        SearchRequest searchRequest=new SearchRequest("tvs");
        //请求体
        SearchSourceBuilder searchSourceBuilder=new SearchSourceBuilder();
        searchSourceBuilder.size(0);
        searchSourceBuilder.query(QueryBuilders.matchAllQuery());
        DateHistogramAggregationBuilder dateHistogramAggregationBuilder = 
                AggregationBuilders.dateHistogram("date_histogram")
                .field("sold_date")
                .calendarInterval(DateHistogramInterval.QUARTER)
                .format("yyyy-MM-dd")
                .minDocCount(0)
                .extendedBounds(new ExtendedBounds("2019-01-01", "2020-12-31"));
        SumAggregationBuilder sumAggregationBuilder = 
                AggregationBuilders.sum("income").field("price");
        dateHistogramAggregationBuilder.subAggregation(sumAggregationBuilder);
        searchSourceBuilder.aggregation(dateHistogramAggregationBuilder);
        //请求体放入请求头
        searchRequest.source(searchSourceBuilder);
        //2 执行
        SearchResponse searchResponse = client.search(searchRequest, RequestOptions.DEFAULT);
        Aggregations aggregations = searchResponse.getAggregations();
        ParsedDateHistogram date_histogram = aggregations.get("date_histogram");
        List<? extends Histogram.Bucket> buckets = date_histogram.getBuckets();
        for (Histogram.Bucket bucket : buckets) {
            String keyAsString = bucket.getKeyAsString();
            System.out.println("keyAsString:"+keyAsString);
            long docCount = bucket.getDocCount();
            System.out.println("docCount:"+docCount);

            Aggregations aggregations1 = bucket.getAggregations();
            Sum income = aggregations1.get("income");
            double value = income.getValue();
            System.out.println("value:"+value);
            System.out.println("====================");
        }
    }

返回结果