This example is a custom data layer, that receives a text file with image paths, loads a batch of images and preprocesses them. Just a quick tip, Caffe already has a big range of data layers and probably a custom layer is not the most efficient way if you just want something simple.
My dataLayer.py could be something like:
import caffe class Custom_Data_Layer(caffe.Layer): def setup(self, bottom, top): # Check top shape if len(top) != 2: raise Exception("Need to define tops (data and label)") #Check bottom shape if len(bottom) != 0: raise Exception("Do not define a bottom.") #Read parameters params = eval(self.param_str) src_file = params["src_file"] self.batch_size = params["batch_size"] self.im_shape = params["im_shape"] self.crop_size = params.get("crop_size", False) #Reshape top if self.crop_size: top[0].reshape(self.batch_size, 3, self.crop_size, self.crop_size) else: top[0].reshape(self.batch_size, 3, self.im_shape, self.im_shape) top[1].reshape(self.batch_size) #Read source file #I'm just assuming we have this method that reads the source file #and returns a list of tuples in the form of (img, label) self.imgTuples = readSrcFile(src_file) self._cur = 0 #use this to check if we need to restart the list of imgs def forward(self, bottom, top): for itt in range(self.batch_size): # Use the batch loader to load the next image. im, label = self.load_next_image() #Here we could preprocess the image # ... # Add directly to the top blob top[0].data[itt, ...] = im top[1].data[itt, ...] = label def load_next_img(self): #If we have finished forwarding all images, then an epoch has finished #and it is time to start a new one if self._cur == len(self.imgTuples): self._cur = 0 shuffle(self.imgTuples) im, label = self.imgTuples[self._cur] self._cur += 1 return im, label def reshape(self, bottom, top): """ There is no need to reshape the data, since the input is of fixed size (img shape and batch size) """ pass def backward(self, bottom, top): """ This layer does not back propagate """ pass
And the prototxt would be like:
layer {
name: "Data"
type: "Python"
top: "data"
top: "label"
python_param {
module: "dataLayer"
layer: "Custom_Data_Layer"
param_str: '{"batch_size": 126,"im_shape":256, "crop_size":224, "src_file": "path_to_TRAIN_file.txt"}'
}
}