1

I have index of stores at various location. With each store I have a nested list of discount coupon.

Now I have query to get list of all unique coupons in a x km of radius sorted by the distance of the nearest applicable coupon on given location

Database :: Elasticsearch

Index Mapping ::

{
"mappings": {
    "car_stores": {
        "properties": {
            "location": {
                "type": "geo_point"
            },
            "discount_coupons": {
                "type": "nested",
                "properties": {
                    "name": {
                        "type": "keyword"
                    }
                }
            }
        }
    }
}
}

Sample Doc ::

{
"_index": "stores",
"_type": "car_stores",
"_id": "1258c81d-b6f2-400f-a448-bd728f524b55",
"_score": 1.0,
"_source": {
    "location": {
        "lat": 36.053757,
        "lon": 139.525482
    },
    "discount_coupons": [
        {
            "name": "c1"
        },
        {
            "name": "c2"
        }
    ]
}
}

Old Query to get unique discount coupon names in x km area for given location ::

{
"size": 0,
"query": {
    "bool": {
        "must": {
            "match_all": {}
        },
        "filter": {
            "geo_distance": {
                "distance": "100km",
                "location": {
                    "lat": 40,
                    "lon": -70
                }
            }
        }
    }
},
"aggs": {
    "coupon": {
        "nested": {
            "path": "discount_coupons"
        },
        "aggs": {
            "name": {
                "terms": {
                    "field": "discount_coupons.name",
                    "order": {
                        "_key": "asc"
                    },
                    "size": 200
                }
            }
        }
    }
}
}

Updated Response ::

{
"took": 60,
"timed_out": false,
"_shards": {
    "total": 3,
    "successful": 3,
    "skipped": 0,
    "failed": 0
},
"hits": {
    "total": 245328,
    "max_score": 0.0,
    "hits": []
},
"aggregations": {
    "coupon": {
        "doc_count": 657442,
        "name": {
            "doc_count_error_upper_bound": -1,
            "sum_other_doc_count": 641189,
            "buckets": [
                {
                    "key": "local20210211",
                    "doc_count": 1611,
                    "back_to_base": {
                        "doc_count": 1611,
                        "distance_script": {
                            "value": 160.61034409639765
                        }
                    }
                },
                {
                    "key": "local20210117",
                    "doc_count": 1621,
                    "back_to_base": {
                        "doc_count": 1621,
                        "distance_script": {
                            "value": 77.51459886447356
                        }
                    }
                },
                {
                    "key": "local20201220",
                    "doc_count": 1622,
                    "back_to_base": {
                        "doc_count": 1622,
                        "distance_script": {
                            "value": 84.15734462544432
                        }
                    }
                },
                {
                    "key": "kisekae1",
                    "doc_count": 1626,
                    "back_to_base": {
                        "doc_count": 1626,
                        "distance_script": {
                            "value": 88.23770888201268
                        }
                    }
                },
                {
                    "key": "local20210206",
                    "doc_count": 1626,
                    "back_to_base": {
                        "doc_count": 1626,
                        "distance_script": {
                            "value": 86.78376012847237
                        }
                    }
                },
                {
                    "key": "local20210106",
                    "doc_count": 1628,
                    "back_to_base": {
                        "doc_count": 1628,
                        "distance_script": {
                            "value": 384.12156408078397
                        }
                    }
                },
                {
                    "key": "local20210113",
                    "doc_count": 1628,
                    "back_to_base": {
                        "doc_count": 1628,
                        "distance_script": {
                            "value": 153.61681676703674
                        }
                    }
                },
                {
                    "key": "local20",
                    "doc_count": 1629,
                    "back_to_base": {
                        "doc_count": 1629,
                        "distance_script": {
                            "value": 168.74132991524073
                        }
                    }
                },
                {
                    "key": "local20210213",
                    "doc_count": 1630,
                    "back_to_base": {
                        "doc_count": 1630,
                        "distance_script": {
                            "value": 155.8335679860034
                        }
                    }
                },
                {
                    "key": "local20210208",
                    "doc_count": 1632,
                    "back_to_base": {
                        "doc_count": 1632,
                        "distance_script": {
                            "value": 99.58790590445102
                        }
                    }
                }
            ]
        }
    }
}
}

Now the above query will return first 200 discount coupon default sorted by count but I want to return coupons sorted on distance based to given location i.e. the coupon that is nearest applicable should come first.

Is there any way to sort nested aggregations based on a parent key or can I solve this use case using a different data model?

Update Query ::

{
"size": 0,
"query": {
    "bool": {
        "filter": [
            {
                "geo_distance": {
                    "distance": "100km",
                    "location": {
                        "lat": 35.699104,
                        "lon": 139.825211
                    }
                }
            },
            {
                "nested": {
                    "path": "discount_coupons",
                    "query": {
                        "bool": {
                            "filter": {
                                "exists": {
                                    "field": "discount_coupons"
                                }
                            }
                        }
                    }
                }
            }
        ]
    }
},
"aggs": {
    "coupon": {
        "nested": {
            "path": "discount_coupons"
        },
        "aggs": {
            "name": {
                "terms": {
                    "field": "discount_coupons.name",
                    "order": {
                        "back_to_base": "asc"
                    },
                    "size": 10
                },
                "aggs": {
                    "back_to_base": {
                        "reverse_nested": {},
                        "aggs": {
                            "distance_script": {
                                "min": {
                                    "script": {
                                        "source": "doc['location'].arcDistance(35.699104, 139.825211)"
                                    }
                                }
                            }
                        }
                    }
                }
            }
        }
    }
}
}

1 Answer 1

2

Interesting question. You can always order a terms aggregation by the result of a numeric sub-aggregation. The trick here is to escape the nested context via a reverse_nested aggregation and then calculate the distance from the pivot using a script:

{
  "size": 0,
  "query": {
    "bool": {
      "must": {
        "match_all": {}
      },
      "filter": {
        "geo_distance": {
          "distance": "100km",
          "location": {
            "lat": 40,
            "lon": -70
          }
        }
      }
    }
  },
  "aggs": {
    "coupon": {
      "nested": {
        "path": "discount_coupons"
      },
      "aggs": {
        "name": {
          "terms": {
            "field": "discount_coupons.name",
            "order": {
              "back_to_base": "asc"
            },
            "size": 200
          },
          "aggs": {
            "back_to_base": {
              "reverse_nested": {},
              "aggs": {
                "distance_script": {
                  "min": {
                    "script": {
                      "source": "doc['location'].arcDistance(40, -70)"
                    }
                  }
                }
              }
            }
          }
        }
      }
    }
  }
}
Sign up to request clarification or add additional context in comments.

10 Comments

But I have read script are not recommended for realtime use case . If I say I have a traffic of 500 rps script will surely give high latency Also if I remember correctly script in es have a cap on running a defined number of times in an interval
You're right -- scripts should be avoided, if possible. On the other hand, I don't think there's any simpler numeric aggregation that can calculate the geo distance per-bucket and on-the-fly. You can try this agg in a sandbox and go from there.
Thanks for the help Is sandbox another tool or db can you me proper link or full name of tool
With "sandbox" I meant a sandboxed execution environment where you're in control. So first run it on a highly filtered subsection of all docs, then lower the filter, then try it under a small qps load, then a heavier etc. Not all scripts are created equal and this one might work just fine ;)
It worked only had to pass "back_to_base>distance_script" in order field it was picking doc count as default
|

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.