4

I am trying to write a custom serde according to my need but stuck at a point where getting class cast exception.

Input data is:

john,miller

I want to insert this data into hive as fname string,lname string so for that i wrote a customserde.

I have only implemented deserialize method of SerDe interface as follows:

    package com.datametica.serde;

import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.Properties;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.hive.serde.Constants;
import org.apache.hadoop.hive.serde2.SerDe;
import org.apache.hadoop.hive.serde2.SerDeException;
import org.apache.hadoop.hive.serde2.SerDeStats;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
import org.apache.hadoop.hive.serde2.objectinspector.StructObjectInspector;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfo;
import org.apache.hadoop.hive.serde2.typeinfo.TypeInfoUtils;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;

public class CustomSerde implements SerDe {

    int numColumns;
    StructObjectInspector rowOI;
    List<String> columnNames;
    List<Object> rows;
    List<TypeInfo> columnTypes;

    @Override
    public void initialize(Configuration conf, Properties tblProps)
            throws SerDeException {
        String columnNameProperty = tblProps
                .getProperty(Constants.LIST_COLUMNS);
        columnNames = Arrays.asList(columnNameProperty.split(","));

        String columnTypeProperty = tblProps
                .getProperty(Constants.LIST_COLUMN_TYPES);
        columnTypes = TypeInfoUtils
                .getTypeInfosFromTypeString(columnTypeProperty);
        numColumns = columnNames.size();

        List<ObjectInspector> columnOIs = new ArrayList<ObjectInspector>(
                columnNames.size());
        ObjectInspector oi;

        for (int c = 0; c < numColumns; c++) {
            oi = TypeInfoUtils
                    .getStandardJavaObjectInspectorFromTypeInfo(columnTypes
                            .get(c));
            columnOIs.add(oi);
        }

        /*
         * for (int c = 0; c < numColumns; c++) { rows.add(); }
         */

        rowOI = ObjectInspectorFactory.getStandardStructObjectInspector(
                columnNames, columnOIs);

    }

    @Override
    public CustomDataFormat deserialize(Writable record) throws SerDeException {
        Text text = (Text) record;
        String[] valArray = text.toString().split(",");
        System.out.println("----------------------------\n");
        System.out.println("yo yo yo "+text.toString() + "\n");
        System.out.println("----------------------------\n");
        CustomDataFormat dataObject = new CustomDataFormat();
        dataObject.setFname(valArray[0]);
        dataObject.setLname(valArray[1]);
        return dataObject;
    }

    @Override
    public ObjectInspector getObjectInspector() throws SerDeException {
        return rowOI;
    }

    @Override
    public SerDeStats getSerDeStats() {
        return null;
    }

    @Override
    public Class<? extends Writable> getSerializedClass() {
        return null;
    }

    @Override
    public Writable serialize(Object arg0, ObjectInspector arg1)
            throws SerDeException {
        return null;
    }

}

Class which will hold the data

package com.datametica.serde;

import java.util.ArrayList;
import java.util.List;

public class CustomDataFormat {
    String fname;

    String lname;

    /*List<LevelOneStruct> arrayOfLevelTwoStruct = new ArrayList<LevelOneStruct>();

    public List<LevelOneStruct> getArrayOfLevelTwoStruct() {
        return arrayOfLevelTwoStruct;
    }

    public void setArrayOfLevelTwoStruct(
            List<LevelOneStruct> arrayOfLevelTwoStruct) {
        this.arrayOfLevelTwoStruct = arrayOfLevelTwoStruct;
    }*/

    public String getFname() {
        return fname;
    }

    public void setFname(String fname) {
        this.fname = fname;
    }

    public String getLname() {
        return lname;
    }

    public void setLname(String lname) {
        this.lname = lname;
    }

}

ObjectInspector for CustomDataFormat class

package com.datametica.serde;

import java.util.ArrayList;
import java.util.List;

import org.apache.hadoop.hive.serde2.objectinspector.StandardStructObjectInspector;
import org.apache.hadoop.hive.serde2.objectinspector.StructField;

public class CustomStructObjectInspector extends StandardStructObjectInspector {

    @Override
    public Object getStructFieldData(Object data, StructField fieldRef) {
        Object dataToReturn = new Object();
        CustomDataFormat customSerde = (CustomDataFormat) data;
        switch (fieldRef.getFieldName()) {
        case "fname":
            dataToReturn = customSerde.getFname();
            break;
        /*
         * case "arrayOfLevelTwoStruct": dataToReturn =
         * customSerde.getArrayOfLevelTwoStruct(); break;
         */
        case "lname":
            dataToReturn = customSerde.getLname();
            break;
        default:
            dataToReturn = null;
        }
        return dataToReturn;
    }

    @Override
    public List<Object> getStructFieldsDataAsList(Object data) {
        List<Object> listOfData = new ArrayList<Object>();
        CustomDataFormat customSerde = (CustomDataFormat) data;
        listOfData.add(customSerde.getFname());
        listOfData.add(customSerde.getLname());
        return listOfData;
    }

}

After creating the jar i am creating hive table as

create table customserde (fname string,lname string) row format serde 'com.datametica.serde.CustomSerde';

And loading the data into table as

load data inpath '/user/dm3/tables_data/customserde' into table customserde;

Everything is fine till now but when i do select operation on the table as

select * from customserde;

getting exception

Caused by: java.lang.ClassCastException: com.datametica.serde.CustomDataFormat cannot be cast to [Ljava.lang.Object;

Any help is appreciated I am totally stuck at this point

thanks in advance.

1 Answer 1

3

I have found my mistake deserialize() method does not return the object of the customdataformat.class but object of row means arrayList as follows

public Object deserialize(Writable record) throws SerDeException {
        Text text = (Text) record;
        String[] valArray = text.toString().split(",");
        CustomDataFormat dataObject = new CustomDataFormat();
        dataObject.setFname(valArray[0]);
        dataObject.setLname(valArray[1]);

        rows.set(0, dataObject.getFname());
        rows.set(1, dataObject.getLname());
        return rows;
    }
Sign up to request clarification or add additional context in comments.

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.