0

I have the following mapping for an index:

{
  "test5" : {
    "mappings" : {
      "dynamic" : "false",
      "properties" : {
        "messageType" : {
          "type" : "keyword"
        },
        "groupId" : {
          "type" : "keyword"
        },
        "payload" : {
          "type" : "nested",
          "include_in_root" : true,
          "properties" : {
            "request" : {
              "type" : "nested",
              "include_in_root" : true,
              "properties" : {
                "data" : {
                  "type" : "nested",
                  "include_in_root" : true,
                  "properties" : {
                    "chargingPeriods" : {
                      "type" : "nested",
                      "include_in_root" : true,
                      "properties" : {
                        "endDateTime" : {
                          "type" : "date"
                        },
                        "power" : {
                          "type" : "double"
                        },
                        "startDateTime" : {
                          "type" : "date"
                        }
                      }
                    }
                  }
                }
              }
            }
          }
        }
      }
    }
  }
}

First use case, I want buckets in 2 min intervals based on payload.request.data.chargingPeriods.startDateTime and groupId with a filter criteria of messageType . BTW chargingPeriods is an array.

This query works for that use case:

GET test5/_search
{
  "size": 0,
  "aggs": {
    "my_buckets": {
      "composite": {
        "sources": [
          { "sessionId": { "terms": { "field": "groupId"} } },
          {
            "date" : {
              "date_histogram": {
                "field": "payload.request.data.chargingPeriods.startDateTime",
                "fixed_interval": "2m",
                "format": "MM/dd/yyyy - hh:mm:ss",
                "order": "asc"
              }
            }
          }
        ]
      }    
    }
  },
  "query": {
    "terms": {
      "messageType": [
        "test"
      ]
    }
  }
}

Now I want metric aggregations done on these composite buckets returned and I tried this:

GET test5/_search
{
  "size": 0,
  "aggs": {
    "my_buckets": {
      "composite": {
        "sources": [
          { "sessionId": { "terms": { "field": "groupId"} } },
          {
            "date" : {
              "date_histogram": {
                "field": "payload.request.data.chargingPeriods.startDateTime",
                "fixed_interval": "2m",
                "format": "MM/dd/yyyy - hh:mm:ss",
                "order": "asc"
              }
            }
          }
        ]
      },
      "aggregations": {
        "metricAgg": {
          "max": {
            "field": "payload.request.data.chargingPeriods.power"
          }
        }
      }      
    }
  },
  "query": {
    "terms": {
      "messageType": [
        "test"
      ]
    }
  }
}

According to ES documentation https://www.elastic.co/guide/en/elasticsearch/reference/current/search-aggregations-bucket-composite-aggregation.html, this should work by doing metric aggregation on the composite bucket But instead of the metric aggregation being computed on the composite bucket, it is being computed across all the power fields in chargingPeriods array in the entire given document.

How I created the index:

PUT /test5
{
  "settings": {
    "number_of_shards": 1
  },
  "mappings" : {
      "dynamic" : "false",
      "properties" : {
        "groupId" : {
          "type" : "keyword"
        },
        "messageType" : {
          "type" : "keyword"
        },
        "payload" : {
          "type" : "nested",
          "include_in_root": true,
          "properties": {
            "request": {
              "type":"nested",
              "include_in_root":true,
              "properties": {
                "data": {
                  "type":"nested",
                  "include_in_root": true,
                  "properties": {
                    "chargingPeriods": {
                      "type": "nested",
                      "include_in_root": true,
                      "properties" : {
                        "endDateTime":{
                          "type": "date"
                        },
                        "power": {
                          "type": "double"
                        },
                        "startDateTime":{
                          "type": "date"
                        }
                      }
                    }
                  }
                }
              }
            }
          }
        }
      }
    }
}

Test Data:

    POST test5/_doc/testdocu1
{
  "groupId": "563",
  "messageType": "test",
  "payload": {
    "request": {
      "data": {
        "chargingPeriods": [
          {
            "endDateTime": "2022-10-13T17:42:25Z",
            "power": 9.62857,
            "startDateTime": "2022-10-13T17:41:55Z"
          },
          {
            "endDateTime": "2022-10-13T17:42:55Z",
            "power": 9.6491,
            "startDateTime": "2022-10-13T17:42:25Z"
          },
          {
            "endDateTime": "2022-10-13T17:43:25Z",
            "power": 9.6491,
            "startDateTime": "2022-10-13T17:42:55Z"
          },
          {
            "endDateTime": "2022-10-13T17:43:55Z",
            "power": 9.66963,
            "startDateTime": "2022-10-13T17:43:25Z"
          },
          {
            "endDateTime": "2022-10-13T17:44:25Z",
            "power": 9.67128,
            "startDateTime": "2022-10-13T17:43:55Z"
          },
          {
            "endDateTime": "2022-10-13T17:44:55Z",
            "power": 9.65079,
            "startDateTime": "2022-10-13T17:44:25Z"
          },
          {
            "endDateTime": "2022-10-13T17:45:25Z",
            "power": 9.66492,
            "startDateTime": "2022-10-13T17:44:55Z"
          },
          {
            "endDateTime": "2022-10-13T17:45:55Z",
            "power": 9.68544,
            "startDateTime": "2022-10-13T17:45:25Z"
          },
          {
            "endDateTime": "2022-10-13T17:46:25Z",
            "power": 9.68544,
            "startDateTime": "2022-10-13T17:45:55Z"
          },
          {
            "endDateTime": "2022-10-13T17:46:55Z",
            "power": 9.67434,
            "startDateTime": "2022-10-13T17:46:25Z"
          }
        ]
      }
    }
  }
}

My output:

  "aggregations" : {
    "my_buckets" : {
      "after_key" : {
        "sessionId" : "563",
        "date" : "10/13/2022 - 05:46:00"
      },
      "buckets" : [
        {
          "key" : {
            "sessionId" : "563",
            "date" : "10/13/2022 - 05:40:00"
          },
          "doc_count" : 1,
          "metricAgg" : {
            "value" : 9.68544
          }
        },
        {
          "key" : {
            "sessionId" : "563",
            "date" : "10/13/2022 - 05:42:00"
          },
          "doc_count" : 4,
          "metricAgg" : {
            "value" : 9.68544
          }
        },
        {
          "key" : {
            "sessionId" : "563",
            "date" : "10/13/2022 - 05:44:00"
          },
          "doc_count" : 4,
          "metricAgg" : {
            "value" : 9.68544
          }
        },
        {
          "key" : {
            "sessionId" : "563",
            "date" : "10/13/2022 - 05:46:00"
          },
          "doc_count" : 1,
          "metricAgg" : {
            "value" : 9.68544
          }
        }
      ]
    }
  }

As you can see, it chose the max payload.request.data.chargingPeriods.power from all the elements, ignoring the composite buckets. For example

{
  "key" : {
    "sessionId" : "563",
    "date" : "10/13/2022 - 05:40:00"
  },
  "doc_count" : 1,
  "metricAgg" : {
    "value" : 9.68544
  }
},

metricAgg should have been 9.62857

1 Answer 1

1
+50

It doesn't work the way you expect because you're aggregating nested data which you have include_in_root, and hence, all the nested data finds itself in the root document as if it was not nested, and so, the relation between the startDateTime and the power is basically lost.

The other issue is that your composite aggregation aggregates nested (payload...) and non-nested data (groupId), that won't work.

However, if you add the groupId field inside each element of your array, then you can make your query work like this:

GET test5/_search
{
  "size": 0,
  "aggs": {
    "payload": {
      "nested": {
        "path": "payload"
      },
      "aggs": {
        "request": {
          "nested": {
            "path": "payload.request"
          },
          "aggs": {
            "data": {
              "nested": {
                "path": "payload.request.data"
              },
              "aggs": {
                "charging": {
                  "nested": {
                    "path": "payload.request.data.chargingPeriods"
                  },
                  "aggs": {
                    "my_buckets": {
                      "composite": {
                        "sources": [
                          {
                            "sessionId": {
                              "terms": {
                                "field": "payload.request.data.chargingPeriods.groupId"
                              }
                            }
                          },
                          {
                            "date": {
                              "date_histogram": {
                                "field": "payload.request.data.chargingPeriods.startDateTime",
                                "fixed_interval": "2m",
                                "format": "MM/dd/yyyy - hh:mm:ss",
                                "order": "asc"
                              }
                            }
                          }
                        ]
                      },
                      "aggregations": {
                        "metricAgg": {
                          "max": {
                            "field": "payload.request.data.chargingPeriods.power"
                          }
                        }
                      }
                    }
                  }
                }
              }
            }
          }
        }
      }
    }
  },
  "query": {
    "terms": {
      "messageType": [
        "test"
      ]
    }
  }
}

Results:

            {
              "key" : {
                "sessionId" : "563",
                "date" : "10/13/2022 - 05:40:00"
              },
              "doc_count" : 1,
              "metricAgg" : {
                "value" : 9.62857
              }
            },
Sign up to request clarification or add additional context in comments.

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.