2
private static String[] testFiles = new String[]     {"img01.JPG","img02.JPG","img03.JPG","img04.JPG","img06.JPG","img07.JPG","img05.JPG"};
 // private static String testFilespath = "/home/student/Desktop/images";
private static String testFilespath ="hdfs://localhost:54310/user/root/images";
//private static String indexpath = "/home/student/Desktop/indexDemo";
private static  String testExtensive="/home/student/Desktop/images";

public static class MapClass extends MapReduceBase
implements Mapper<Text, Text, Text, Text> {
private Text input_image = new Text();
private Text input_vector = new Text();
    @Override
public void map(Text key, Text value,OutputCollector<Text, Text> output,Reporter       reporter) throws IOException {

 System.out.println("CorrelogramIndex Method:");  
       String featureString;
int MAXIMUM_DISTANCE = 16;
AutoColorCorrelogram.Mode mode = AutoColorCorrelogram.Mode.FullNeighbourhood;
for (String identifier : testFiles) {
            try (FileInputStream fis = new FileInputStream(testFilespath + "/" +    identifier)) {
  //Document doc = builder.createDocument(fis, identifier);
//FileInputStream imageStream = new FileInputStream(testFilespath + "/" + identifier);
BufferedImage bimg = ImageIO.read(fis);
 AutoColorCorrelogram vd = new AutoColorCorrelogram(MAXIMUM_DISTANCE, mode);
                 vd.extract(bimg);
               featureString = vd.getStringRepresentation();
               double[] bytearray=vd.getDoubleHistogram();
              System.out.println("image: "+ identifier + " " + featureString );

        }
             System.out.println(" ------------- ");
input_image.set(identifier);
input_vector.set(featureString);
   output.collect(input_image, input_vector);
              }

     }
   }

  public static class Reduce extends MapReduceBase
  implements Reducer<Text, Text, Text, Text> {

    @Override
public void reduce(Text key, Iterator<Text> values,
                   OutputCollector<Text, Text> output, 
                   Reporter reporter) throws IOException {
  String out_vector="";

  while (values.hasNext()) {
   out_vector.concat(values.next().toString());
 }
  output.collect(key, new Text(out_vector));
  }
}

static int printUsage() {
System.out.println("image_mapreduce [-m <maps>] [-r <reduces>] <input> <output>");
ToolRunner.printGenericCommandUsage(System.out);
return -1;
}


@Override
  public int run(String[] args) throws Exception {
JobConf conf = new JobConf(getConf(), image_mapreduce.class);
conf.setJobName("image_mapreduce");

// the keys are words (strings)
conf.setOutputKeyClass(Text.class);
// the values are counts (ints)
conf.setOutputValueClass(Text.class);

conf.setMapperClass(MapClass.class);        
//  conf.setCombinerClass(Reduce.class);
conf.setReducerClass(Reduce.class);

List<String> other_args = new ArrayList<String>();
for(int i=0; i < args.length; ++i) {
  try {
    if ("-m".equals(args[i])) {
      conf.setNumMapTasks(Integer.parseInt(args[++i]));
    } else if ("-r".equals(args[i])) {
      conf.setNumReduceTasks(Integer.parseInt(args[++i]));
    } else {
      other_args.add(args[i]);
    }
  } catch (NumberFormatException except) {
    System.out.println("ERROR: Integer expected instead of " + args[i]);
    return printUsage();
  } catch (ArrayIndexOutOfBoundsException except) {
    System.out.println("ERROR: Required parameter missing from " +
                       args[i-1]);
    return printUsage();
  }
}



   FileInputFormat.setInputPaths(conf, other_args.get(0));
    //FileInputFormat.setInputPaths(conf,new    Path("hdfs://localhost:54310/user/root/images"));
FileOutputFormat.setOutputPath(conf, new Path(other_args.get(1)));

JobClient.runJob(conf);
return 0;
}


 public static void main(String[] args) throws Exception {
int res = ToolRunner.run(new Configuration(), new image_mapreduce(), args);
System.exit(res);
 }

}

`I am writing a program which takes multiple image files as input , stored in hdfs & extract the features in map function. How can I specify the path to read the image in FileInputStream(some parameters)? Or is there any way to read the multiple image files?

What I want to do is: --Take multiple image files in hdfs as input -- extract features in map function. --reduce itearatively. Please help me in the code or better ways to do it.

2 Answers 2

1

Look into using the HIPI library - it stores a collection of images into an ImageBundle (which is more efficient that storing the individual image files in HDFS). They have a couple of examples too.

As for your code, you need to specify what input and output formats you plan to use. There is no current input format that hands the entire file over, but you can just extend FileInputFormat and create a RecordReader that emits <Text, BytesWritable> pairs, where the key is the filename, and the value is the bytes of the image file.

In fact Hadoop - The Definitive Guide has an example of this exact input format:

Sign up to request clarification or add additional context in comments.

5 Comments

@Chris... thanks for reply.. but I am currently using LIRe,lucene APIs only...Can you please just tell, is the code I have written correct?
@Chris...thanks again.. hey but i am not sure how to specify the path? Is the path specified in above code correct? It would be helpful if you edit the above code & convert it into required so that I can catch up with it & proceed further.. thanks..
You're current code is quite far from the solution (and besides your better off experimenting so you learn from the experience), but to specify the input files use FileInputFormat.setInputPaths(conf, "hdfs://localhost:54310/user/root/images/*.JPG") should work
hey...I think thats very helpful... R u suggesting that I should process each file as separate record whose input format will be (Nullwritable,Bytewritable) which is WholeFileInputFormat using RecordReader & then SmallFilesToSequenceFileConverter to gather all the feature vectors i.e. key-value pairs of all the image files together(page 206-209)? But here the values will be the bytearray of the image file. I am using feature extraction method & storing the string-vector as value. So how can I integrate this to store feature vector as value?
@Chris...Please check out [stackoverflow.com/questions/10885039/… for the code I have written according to your suggestions.
0

If you want to send all the images as input to MR task you just set the conf.setFileInputPath() to the directory of the input If You want to send selective images in a particular folder You can add multiple paths when you are setting conf.setFileInputPath();

One way is to create a Path[] one for each image. or just set it to comma separated string with all the paths. Go through the following documentation

http://hadoop.apache.org/docs/current/api/org/apache/hadoop/mapred/FileInputFormat.html

And one more thing you have to set the Map input formats as Text, ByteArray get the image features from that ByteArray input instead of creating new fileinputstream.

Comments

Your Answer

By clicking “Post Your Answer”, you agree to our terms of service and acknowledge you have read our privacy policy.

Start asking to get answers

Find the answer to your question by asking.

Ask question

Explore related questions

See similar questions with these tags.