0

here is my hive table

create table if not exists dumdum (val map<string,map<string,struct<student_id:string,age:int>>>);
insert into dumdum select map('A',map('1',named_struct('student_id','123a', 'age',11)));
insert into dumdum select map('B',map('2',named_struct('student_id','987z', 'age',11)));
select * from dumdum;

and i see

{"A":{"1":{"student_id":"123a","age":11}}}
{"B":{"2":{"student_id":"987z","age":11}}}

I want to extract all the student_id from the inner map i.e. 123a and 987z. So here is what i want to do

select some_udf(val) from dumdum;

and the result should be

["123a","987z"]

Here is the Java UDF i wrote

import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;

import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
import org.apache.hadoop.hive.ql.exec.UDFArgumentLengthException;
import org.apache.hadoop.hive.ql.metadata.HiveException;
import org.apache.hadoop.hive.ql.udf.generic.GenericUDF;

import org.apache.hadoop.hive.serde2.objectinspector.*;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter;

import org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;

import java.util.ArrayList;
import java.util.List;
import java.util.Map;

public class CustomUDF extends GenericUDF {
    private MapObjectInspector  inputMapOI                        = null;

    private Converter           inputMapKeyConverter              = null;

    private MapObjectInspector inputMapValueMapOI               = null;
    private Converter inputMapValueConverter;

    @Override
    public String getDisplayString(String[] arguments) {
        return "my udf";
    }

    @Override
    public ObjectInspector initialize(ObjectInspector[] arguments) throws UDFArgumentException {
        if ((null == arguments) || (arguments.length != 1)) {
            throw new UDFArgumentLengthException("1 arguments are expected.");
        }

        if (!(arguments[0] instanceof MapObjectInspector)) {
            throw new UDFArgumentException("The first parameter should be a map object ");
        }

        inputMapOI = (MapObjectInspector) arguments[0];

        ObjectInspector mapKeyOI = PrimitiveObjectInspectorFactory.getPrimitiveJavaObjectInspector(PrimitiveObjectInspector.PrimitiveCategory.STRING);
        inputMapKeyConverter = ObjectInspectorConverters.getConverter(this.inputMapOI.getMapKeyObjectInspector(), mapKeyOI);

        if (!(inputMapOI.getMapValueObjectInspector() instanceof MapObjectInspector)) {
            throw new UDFArgumentException("The map value type must be a map ");
        }
        inputMapValueMapOI = (MapObjectInspector) this.inputMapOI.getMapValueObjectInspector();

        List<String> structFieldNames = new ArrayList<String>();

        structFieldNames.add("student_id");
        structFieldNames.add("age");

        List<ObjectInspector> structFieldObjectInspectors = new ArrayList<ObjectInspector>();
        structFieldObjectInspectors.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
        structFieldObjectInspectors.add(PrimitiveObjectInspectorFactory.javaIntObjectInspector);

        ObjectInspector inputMapElementOI = inputMapValueMapOI.getMapValueObjectInspector();
        ObjectInspector outputMapElementOI = ObjectInspectorFactory.getStandardStructObjectInspector(structFieldNames, structFieldObjectInspectors);

        inputMapValueConverter = ObjectInspectorConverters.getConverter(inputMapElementOI, outputMapElementOI);

        return ObjectInspectorFactory.getStandardListObjectInspector(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
    }

    @Override
    public Object evaluate(DeferredObject[] arguments) throws HiveException {
        if ((null == arguments) || (arguments.length != 1)) {
            throw new UDFArgumentLengthException("1 argument is expected.");
        }

        Map<?, ?> map = inputMapOI.getMap(arguments[0].get());
        List<String> dataList = new ArrayList<String>();
        for (Object key : map.keySet()) {
            Map<?, ?> valueMap = this.inputMapValueMapOI.getMap(map.get(key));
            if ((valueMap == null) || (valueMap.size() == 0)) {
                continue;
            }

            for (Object value : valueMap.keySet()) {
                try{
                    String innerkey = (String) this.inputMapKeyConverter.convert(value);
                    System.out.println("Got "+innerKey);
                    Map<?, ?> innerMap = (Map<?, ?>) this.inputMapValueMapOI.getMap(valueMap.get(key));
                    if ((innerMap == null) || (innerMap.size() == 0)) {
                        System.out.println("Got null");
                        continue;
                    }
                    for (Object struct : innerMap.keySet()) {
                    String strValue = (String) this.inputMapValueConverter.convert(struct);
                    
                    StructField str = (StructField) inputMapValueConverter.convert(innerMap.get(strValue));
                    /*
                    Not sure what to do here. Maybe 
                    str.getFieldID();
                    dataList.add(str.toString()); 
                    */
                    
                }
                }
                catch (ClassCastException c){
                    System.out.println("Got ClassCastException");
                }
            }
        }

        return dataList;
    }
}

when i invoke it as

add jar /path/to/my/jar;
CREATE TEMPORARY FUNCTION modudf AS 'some.package.CustomUDF';
select modudf(val) from dumdum;

i never get past

Map<?, ?> innerMap = (Map<?, ?>) this.inputMapValueMapOI.getMap(valueMap.get(inner));
                        if ((innerMap == null) || (innerMap.size() == 0)) {
                            System.out.println("Got null");
                            continue;
                        }

I can see the output of

System.out.println("Got "+innerKey);

on the console.

Why cant my converter access the inner map ?

Also, how will i dereference the StructField once i am able to access the inner map ?

Update

Thanks serge_k for the suggestion. I'm afraid i still need one converter else i wont be able to get the key. here is what i tried

First i defined the second map inspector and the struct inspector as

inputMapValueMapOI = (MapObjectInspector) this.inputMapOI.getMapValueObjectInspector();

        List<String> structFieldNames = new ArrayList<String>();

        structFieldNames.add("student_id");
        structFieldNames.add("age");

        List<ObjectInspector> structFieldObjectInspectors = new ArrayList<ObjectInspector>();
        structFieldObjectInspectors.add(PrimitiveObjectInspectorFactory.javaStringObjectInspector);
        structFieldObjectInspectors.add(PrimitiveObjectInspectorFactory.javaIntObjectInspector);

        structOI = ObjectInspectorFactory.getStandardStructObjectInspector(structFieldNames, structFieldObjectInspectors);

then here is what i tried

                    String innerkey = (String) inputMapKeyConverter.convert(value);
                    System.out.println(innerKey);
                    Map<?, ?> innerMap = (Map<?, ?>) this.inputMapValueMapOI.getMap(valueMap.get(innerkey));
                    if ((innerMap == null) || (innerMap.size() == 0)) {
                        System.out.println("null inner map");
                        continue;
                    }
                    
                    for (Object struct : innerMap.keySet()) {
                        String ikey = (String) inputMapKeyConverter.convert(struct);
                        Object obj = structOI.getStructFieldData(innerMap.get(ikey), structOI.getStructFieldRef("student_id"));
                        dataList.add(obj.toString());
                    }

but i still see

null inner map

have i not defined the inner map inspector properly ?

2
  • Try to convert map keys to standard keys (in the update) Commented Jul 1, 2020 at 20:23
  • Second thought, try to iterate through entrySet instead of keySet, looks like you doesn't need keys. Also check out the map size with inputMapOI.getMapSize(arguments[0].get()); Commented Jul 1, 2020 at 21:06

1 Answer 1

1

I would recommend you not to use converters, just define second MapObjectInspector for the inner map, get outer map value and call getMap like for the first map. To get the struct values you need to define a variable of StructObjectInspector type in initialize, e.g.

StructObjectInspector soi = ObjectInspectorFactory.getStandardStructObjectInspector(structFieldNames, structFieldObjectInspectors)

then

Object obj = soi.getStructFieldData(innerMapValue, soi.getStructFieldRef("student_id"))

Update: Try to convert map keys to standard keys as follows

private Map stdKeys(Map inspectMap) {
    Map objMap = new HashMap();
    for (Object inspKey : inspectMap.keySet()) {

        Object objKey = ((PrimitiveObjectInspector) mapInspector.getMapKeyObjectInspector()).getPrimitiveJavaObject(inspKey);
        objMap.put(objKey, inspKey);

    }
    return objMap;
}

See https://github.com/klout/brickhouse/blob/master/src/main/java/brickhouse/udf/collect/MapRemoveKeysUDF.java for more details

Sign up to request clarification or add additional context in comments.

1 Comment

thanks serge_k, please see my update. i still cant access the inner map. i need at least 1 converter to be able to get the keys as strings. however, i also created a second inspector for the inner map but still i am unable to extract the inner map

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.