1

Im trying to run a mapreduce program in hadoop. Basically it takes in a text file as input in which each line is a json text. Im using simple json to parse this data in my mapper and the reducer does some other stuff. I have included the simple json jar file in hadoop/lib folder. here is the code below

package org.myorg;

import java.io.IOException;
import java.util.Iterator;
import java.util.*;
import org.json.simple.JSONArray;
import org.json.simple.JSONObject;
import org.json.simple.parser.JSONParser;
import org.json.simple.parser.ParseException;
import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.io.*;
import org.apache.hadoop.mapreduce.*;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.mapreduce.lib.output.FileOutputFormat;
import org.apache.hadoop.mapreduce.lib.output.TextOutputFormat;
import org.apache.hadoop.util.GenericOptionsParser;

public class ALoc
{
    public static class AMapper extends Mapper<Text, Text, Text, Text>
    {
        private Text kword = new Text();
        private Text vword = new Text();
        JSONParser parser = new JSONParser();

        public void map(LongWritable key, Text value, Context context) throws IOException, InterruptedException{
            try {
                String line = value.toString();
                Object obj = parser.parse(line);

                JSONObject jsonObject = (JSONObject) obj;
                String val = (String)jsonObject.get("m1") + "," + (String)jsonObject.get("m3");
                kword.set((String)jsonObject.get("m0"));
                vword.set(val);
                context.write(kword, vword);
            } 
            catch (IOException e) {
                 e.printStackTrace();
            }
            catch (ParseException e) {
                e.printStackTrace();
            }              
        }
    }

    public static class CountryReducer
        extends Reducer<Text,Text,Text,Text>
    {
        private Text result = new Text();
        public void reduce(Text key, Iterable<Text> values,
        Context context
        ) throws IOException, InterruptedException
        {
            int ccount = 0;
            HashMap<Text, Integer> hm = new HashMap<Text, Integer>();

            for (Text val : values)
            {
                if(hm.containsKey(val)){
                        Integer n = (Integer)hm.get(val);
                        hm.put(val, n+1);
                }else{
                        hm.put(val, new Integer(1));
                }
            }
            Set set = hm.entrySet();
            Iterator i = set.iterator();
            String agr = "";

            while(i.hasNext()) {
                Map.Entry me = (Map.Entry)i.next();
                agr += "|" + me.getKey() + me.getValue();
            }
            result.set(agr);
            context.write(key, result);
        }
    }

    public static void main(String[] args) throws Exception
    {
        Configuration conf = new Configuration();
        Job job = new Job(conf, "ALoc");
        job.setJarByClass(ALoc.class);
        job.setMapperClass(AMapper.class);
        job.setReducerClass(CountryReducer.class);
        job.setOutputKeyClass(Text.class);
        job.setOutputValueClass(Text.class);
        job.setInputFormatClass(TextInputFormat.class);
        FileInputFormat.addInputPath(job, new Path(args[0]));
        FileOutputFormat.setOutputPath(job, new Path(args[1]));
        System.exit(job.waitForCompletion(true) ? 0 : 1);
    }
}

When I try to run the job. It gives the following error. I am running this in a aws micro instance single node. I have been following this tutorial http://www.michael-noll.com/tutorials/running-hadoop-on-ubuntu-linux-single-node-cluster/

    hadoop@domU-18-11-19-02-92-8E:/$ bin/hadoop jar ALoc.jar org.myorg.ALoc /user/hadoop/adata /user/hadoop/adata-op5 -D mapred.reduce.tasks=16
13/02/12 08:39:50 WARN mapred.JobClient: Use GenericOptionsParser for parsing the arguments. Applications should implement Tool for the same.
13/02/12 08:39:50 INFO input.FileInputFormat: Total input paths to process : 1
13/02/12 08:39:50 INFO util.NativeCodeLoader: Loaded the native-hadoop library
13/02/12 08:39:50 WARN snappy.LoadSnappy: Snappy native library not loaded
13/02/12 08:39:51 INFO mapred.JobClient: Running job: job_201302120714_0006
13/02/12 08:39:52 INFO mapred.JobClient:  map 0% reduce 0%
13/02/12 08:40:10 INFO mapred.JobClient: Task Id : attempt_201302120714_0006_m_000000_0, Status : FAILED
java.lang.RuntimeException: Error while running command to get file permissions : java.io.IOException: Cannot run program "/bin/ls": java.io.IOException: error=12, Cannot allocate memory
    at java.lang.ProcessBuilder.start(ProcessBuilder.java:475)
    at org.apache.hadoop.util.Shell.runCommand(Shell.java:200)
    at org.apache.hadoop.util.Shell.run(Shell.java:182)
    at org.apache.hadoop.util.Shell$ShellCommandExecutor.execute(Shell.java:375)
    at org.apache.hadoop.util.Shell.execCommand(Shell.java:461)
    at org.apache.hadoop.util.Shell.execCommand(Shell.java:444)
    at org.apache.hadoop.fs.FileUtil.execCommand(FileUtil.java:710)
    at org.apache.hadoop.fs.RawLocalFileSystem$RawLocalFileStatus.loadPermissionInfo(RawLocalFileSystem.java:443)
    at org.apache.hadoop.fs.RawLocalFileSystem$RawLocalFileStatus.getOwner(RawLocalFileSystem.java:426)
    at org.apache.hadoop.mapred.TaskLog.obtainLogDirOwner(TaskLog.java:267)
    at org.apache.hadoop.mapred.TaskLogsTruncater.truncateLogs(TaskLogsTruncater.java:124)
    at org.apache.hadoop.mapred.Child$4.run(Child.java:260)
    at java.security.AccessController.doPrivileged(Native Method)
    at javax.security.auth.Subject.doAs(Subject.java:416)
    at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1121)
    at org.apache.hadoop.mapred.Child.main(Child.java:249)
Caused by: java.io.IOException: java.io.IOException: error=12, Cannot allocate memory
    at java.lang.UNIXProcess.<init>(UNIXProcess.java:164)
    at java.lang.ProcessImpl.start(ProcessImpl.java:81)
    at java.lang.ProcessBuilder.start(ProcessBuilder.java:468)
    ... 15 more

    at org.apache.hadoop.fs.RawLocalFileSystem$RawLocalFileStatus.loadPermissionInfo(RawLocalFileSystem.java:468)
    at org.apache.hadoop.fs.RawLocalFileSystem$RawLocalFileStatus.getOwner(RawLocalFileSystem.java:426)
    at org.apache.hadoop.mapred.TaskLog.obtainLogDirOwner(TaskLog.java:267)
    at org.apache.hadoop.mapred.TaskLogsTruncater.truncateLogs(TaskLogsTruncater.java:124)
    at org.apache.hadoop.mapred.Child$4.run(Child.java:260)
    at java.security.AccessController.doPrivileged(Native Method)
    at javax.security.auth.Subject.doAs(Subject.java:416)
    at org.apache.hadoop.security.UserGroupInformation.doAs(UserGroupInformation.java:1121)
    at org.apache.hadoop.mapred.Child.main(Child.java:249)

1 Answer 1

1

I guess you must be trying hadoop on Micro instance which have very less memory (~700MB).

Try increasing the HADOOP Heapsize parameter (in hadoop/conf/hadoop-env.sh) .. as the basic reason is shortage of memory required to fork processes

Sign up to request clarification or add additional context in comments.

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.