What are the secret ingredients of Pinterest’s fashion recommendations? — Part 2

An effort of reimplementing Pinterest’s recommender system

Overview

  • How the Complete The Look dataset is organized and processed
  • How to reimplement this recommender system
The idea of Complete The Look task

Dataset Explanation

Example of a scene-product image pair

Fashion Recommender System

Fashion Recommender System with Complete The Look

Style Embedding

Style Embedding
class StyleEmbedding(object):
def __init__(self):
self._num_crop = 4
self.model = keras.applications.ResNet50(weights='imagenet', input_shape=(224, 224, 3))
self.model.trainable = False
self.avg_pool = self.model.get_layer('avg_pool').output
self.conv4_6 = self.model.get_layer('conv4_block6_out').output
class StyleEmbedding(object):# code	def build_g_model(self):
x = keras.layers.Dense(units=512)(self.avg_pool)
x = keras.layers.BatchNormalization()(x)
x = keras.layers.Activation('relu')(x)
x = keras.layers.Dropout(rate=0.1)(x)
x = keras.layers.Dense(units=128)(x)
x = keras.layers.Lambda(lambda x: tf.math.l2_normalize(x,axis=-1))(x)

return keras.Model(inputs=self.model.inputs, outputs=x, name='g_model')

def build_l_model(self):
x = keras.layers.Flatten()(self.conv4_6)
x = keras.layers.Dense(units=256)(x)
x = keras.layers.BatchNormalization()(x)
x = keras.layers.Activation('relu')(x)
x = keras.layers.Dropout(rate=0.1)(x)
x = keras.layers.Dense(units=128)(x)
x = keras.layers.Lambda(lambda x: tf.math.l2_normalize(x,axis=-1))(x)
return keras.Model(inputs=self.model.inputs, outputs=x, name='local_model_1')

def build_lh_model(self):
x = keras.layers.Flatten()(self.conv4_6)
x = keras.layers.Dense(units=128)(x)
x = keras.layers.BatchNormalization()(x)
x = keras.layers.Activation('relu')(x)
x = keras.layers.Dropout(rate=0.1)(x)
x = keras.layers.Dense(units=128)(x)
x = keras.layers.Lambda(lambda x: tf.math.l2_normalize(x,axis=-1))(x)
return keras.Model(inputs=self.model.inputs, outputs=x, name='local_model_2')

Global Distance

class GlobalDistanceLayer(keras.layers.Layer):
def __init__(self):
super(GlobalDistanceLayer,self).__init__()

def call(self, inputs):
n = tf.norm(inputs[0]-inputs[1], axis=-1)
n = tf.math.square(n)
return n

Local Distance

class CroppingLayer(keras.layers.Layer):
def __init__(self, offset_height, offset_width, target_height, target_width, size=[224,224]):
super(CroppingLayer,self).__init__()
self.trainable = False
self._offset_height = offset_height
self._offset_width = offset_width
self._target_height = target_height
self._target_width = target_width
self._size = size
def call(self, inputs):
cropped = tf.image.crop_to_bounding_box(inputs, offset_height=self._offset_height, offset_width=self._offset_width, target_height=self._target_height, target_width=self._target_height)
return tf.image.resize(cropped, size=self._size)
class AttentionLayer(keras.layers.Layer):
def __init__(self):
super(AttentionLayer,self).__init__()

def call(self, inputs):
a = tf.math.reduce_euclidean_norm(inputs[0]-inputs[1][tf.newaxis],axis=-1)
a = tf.math.square(a)
a = tf.nn.softmax(a,axis=0)
return a

class LocalDistanceLayer(keras.layers.Layer):
def __init__(self):
super(LocalDistanceLayer,self).__init__()

def call(self, inputs):
d = tf.norm(inputs[0]-inputs[1][tf.newaxis],axis=-1)
d = tf.math.square(d)
d = tf.math.multiply(d,inputs[2])
d = tf.math.reduce_sum(d,axis=0)

return d
Visualization of attention

Hybrid Distance

class HybridDistanceLayer(keras.layers.Layer):
def __init__(self,name=None):
super(HybridDistanceLayer,self).__init__(name=name)
def call(self,inputs):
d = 0.5*(inputs[0]+inputs[1])
return d
class StyleEmbedding(object):
def __call__(self):
scene_inputs = keras.Input((224,224,3),name='scene_input')
pl_inputs = keras.Input((224,224,3),name='positive_input')
mn_inputs = keras.Input((224,224,3),name='negative_input')


g_model = self.build_g_model()
lh_model = self.build_lh_model()
l_model = self.build_l_model()

fs = g_model(scene_inputs)
fpp = g_model(pl_inputs)
fpm = g_model(mn_inputs)
c = lh_model(pl_inputs)


regions = []
step = 224//self._num_crop
for i in range(self._num_crop):
for j in range(self._num_crop):
regions.append(CroppingLayer(offset_height=i*step, offset_width=j*step, target_height=step, target_width=step)(scene_inputs))

fis = []
fihs = []
for i in range(self._num_crop*self._num_crop):
fis.append(l_model(regions[i]))
fihs.append(lh_model(regions[i]))
fis = tf.stack(fis)
fihs = tf.stack(fihs)

a = AttentionLayer()([fihs,c])
pld = LocalDistanceLayer()([fis,fpp,a])
mld = LocalDistanceLayer()([fis,fpm,a])
pgd = GlobalDistanceLayer()([fs,fpp])
mgd = GlobalDistanceLayer()([fs,fpm])
pd = HybridDistanceLayer(name='y_positive')([pgd,pld])
md = HybridDistanceLayer(name='y_negative')([mgd,mld])
outputs = tf.stack([pd,md])

return keras.Model(inputs=[scene_inputs,pl_inputs,mn_inputs], outputs=outputs)

Triplet loss

@tf.function
def compat_loss(y_true,y_pred):
x = y_pred[0] - y_pred[1] + 0.2
x = tf.math.maximum(x,0.0)
x = tf.math.reduce_sum(x)
return x

Product Ranking

index = 76 # image index in the test set

scene = data['scene'][index].numpy()
positive = data['positive'][index].numpy()
negative = data['negative'][index].numpy()
category_label = data['category'][index]
scene_input = keras.applications.resnet.preprocess_input(scene.reshape((1,224,224,3)))
positive_input = keras.applications.resnet.preprocess_input(positive.reshape((1,224,224,3)))
negative_input = keras.applications.resnet.preprocess_input(negative.reshape((1,224,224,3)))

sign_idx = np.where(np.array(data['category']) == category_label)[0]
sign = np.unique(np.array(data['positive_sign'])[sign_idx])
product_idx = []

for i in range(len(sign)):
product_idx.append(np.where(np.array(data['positive_sign'])==sign[i])[0][0])
product_idx = np.array(product_idx)
products = np.array(data['positive'])[product_idx]
product_inputs = keras.applications.resnet.preprocess_input(products)
scene_inputs = np.array(tf.repeat(scene_input,repeats=len(product_idx),axis=0))
negative_inputs = np.array(tf.repeat(negative_input,repeats=len(product_idx),axis=0))
pred = distance_model.predict([scene_inputs,product_inputs,negative_inputs],batch_size=1)

top_idx = np.argsort(pred)
The scene image and the ground truth
The recommended products

Conclusion

--

--

Neurond AI is a transformation business. https://www.neurond.com/

Get the Medium app

A button that says 'Download on the App Store', and if clicked it will lead you to the iOS App store
A button that says 'Get it on, Google Play', and if clicked it will lead you to the Google Play store