import numpy as np
import tensorflow as tf

2024-06-29 10:29:26.592070: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-06-29 10:29:26.747352: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2024-06-29 10:29:26.748772: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 AVX512F FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.
2024-06-29 10:29:27.876822: W tensorflow/compiler/tf2tensorrt/utils/py_utils.cc:38] TF-TRT Warning: Could not find TensorRT

# Two vector example
# Input data

v1 = np.array([1, 2, 3], dtype=float)
v2 = np.array([1, 2, 3.5], dtype=float)  # notice the 3rd element is offset by 0.5

### START CODE HERE ###
# Try modifying the vector v2 to see how it impacts the cosine similarity
# v2 = v1                   # identical vector
# v2 = v1 * -1              # opposite vector
# v2 = np.array([0,-42,1], dtype=float)  # random example
### END CODE HERE ###

print("-- Inputs --")
print("v1 :", v1)
print("v2 :", v2, "\n")

# Similarity score
def cosine_similarity(v1, v2):
    numerator = tf.math.reduce_sum(v1*v2) # takes the dot product between v1 and v2. Equivalent to np.dot(v1, v2)
    denominator = tf.math.sqrt(tf.math.reduce_sum(v1*v1) * tf.math.reduce_sum(v2*v2))
    return numerator / denominator

print("-- Outputs --")
print("cosine similarity :", cosine_similarity(v1, v2).numpy())

-- Inputs --
v1 : [1. 2. 3.]
v2 : [1.  2.  3.5] 

-- Outputs --
cosine similarity : 0.9974086507360697

# Two batches of vectors example
# Input data

v1_1 = np.array([1.0, 2.0, 3.0])
v1_2 = np.array([9.0, 8.0, 7.0])
v1_3 = np.array([-1.0, -4.0, -2.0])
v1_4 = np.array([1.0, -7.0, 2.0])
v1 = np.vstack([v1_1, v1_2, v1_3, v1_4])

v2_1 = v1_1 + np.random.normal(0, 2, 3)  # add some noise to create approximate duplicate
v2_2 = v1_2 + np.random.normal(0, 2, 3)
v2_3 = v1_3 + np.random.normal(0, 2, 3)
v2_4 = v1_4 + np.random.normal(0, 2, 3)
v2 = np.vstack([v2_1, v2_2, v2_3, v2_4])

print("-- Inputs --")
print(f"v1 :\n{v1}\n")
print(f"v2 :\n{v2}\n")

# Batch sizes must match
b = len(v1)
print(f"Batch sizes match : {b == len(v2)}\n")

# Similarity scores

# Option 1 : nested loops and the cosine similarity function
sim_1 = np.zeros([b, b])  # empty array to take similarity scores
# Loop
for row in range(0, sim_1.shape[0]):
    for col in range(0, sim_1.shape[1]):
        sim_1[row, col] = cosine_similarity(v2[row], v1[col]).numpy()

print("-- Outputs --")
print("Option 1 : loop")
print(sim_1)

-- Inputs --
v1 :
[[ 1.  2.  3.]
 [ 9.  8.  7.]
 [-1. -4. -2.]
 [ 1. -7.  2.]]

v2 :
[[ 0.37691176  4.0246877   6.2071861 ]
 [ 9.87477382  7.88132234  6.21902174]
 [-2.85783888 -0.23176011 -1.72727114]
 [ 1.24252109 -7.384875    2.69238464]]

Batch sizes match : True

-- Outputs --
Option 1 : loop
[[ 0.97589663  0.76609263 -0.85108626 -0.28257765]
 [ 0.84066194  0.99651869 -0.8342878  -0.31751144]
 [-0.67892571 -0.85078077  0.47195382 -0.19067196]
 [-0.18303602 -0.26208228  0.62828316  0.99730792]]

# Option 2 : vector normalization and dot product
def norm(x):
    return tf.math.l2_normalize(x, axis=1) # use tensorflow built in normalization

sim_2 = tf.linalg.matmul(norm(v2), norm(v1), transpose_b=True)

print("-- Outputs --")
print("Option 2 : vector normalization and dot product")
print(sim_2, "\n")

# Check
print(f"Outputs are the same : {np.allclose(sim_1, sim_2)}")

-- Outputs --
Option 2 : vector normalization and dot product
tf.Tensor(
[[ 0.97589663  0.76609263 -0.85108626 -0.28257765]
 [ 0.84066194  0.99651869 -0.8342878  -0.31751144]
 [-0.67892571 -0.85078077  0.47195382 -0.19067196]
 [-0.18303602 -0.26208228  0.62828316  0.99730792]], shape=(4, 4), dtype=float64) 

Outputs are the same : True

# Hardcoded matrix of similarity scores
sim_hardcoded = np.array(
    [
        [0.9, -0.8, 0.3, -0.5],
        [-0.4, 0.5, 0.1, -0.1],
        [0.3, 0.1, -0.4, -0.8],
        [-0.5, -0.2, -0.7, 0.5],
    ]
)

sim = sim_hardcoded

### START CODE HERE ###
# Try using different values for the matrix of similarity scores
# sim = 2 * np.random.random_sample((b,b)) -1   # random similarity scores between -1 and 1
# sim = sim_2                                   # the matrix calculated previously using vector normalization and dot product
### END CODE HERE ###

# Batch size
b = sim.shape[0]

print("-- Inputs --")
print(f"sim:")
print(sim)
print(f"shape: {sim.shape}\n")

# Positives
# All the s(A,P) values : similarities from duplicate question pairs (aka Positives)
# These are along the diagonal
sim_ap = np.diag(sim)
print("sim_ap:")
print(np.diag(sim_ap))


# Negatives
# all the s(A,N) values : similarities the non duplicate question pairs (aka Negatives)
# These are in the off diagonals
sim_an = sim - np.diag(sim_ap)
print("\nsim_an:")
print(sim_an)

print("\n-- Outputs --")
# Mean negative
# Average of the s(A,N) values for each row
mean_neg = np.sum(sim_an, axis=1, keepdims=True) / (b - 1)
print("\nmean_neg:")
print(mean_neg)

# Closest negative
# Max s(A,N) that is <= s(A,P) for each row
mask_1 = np.identity(b) == 1            # mask to exclude the diagonal
mask_2 = sim_an > sim_ap.reshape(b, 1)  # mask to exclude sim_an > sim_ap
mask = mask_1 | mask_2
sim_an_masked = np.copy(sim_an)         # create a copy to preserve sim_an
sim_an_masked[mask] = -2

closest_neg = np.max(sim_an_masked, axis=1, keepdims=True)
print("\nclosest_neg :")
print(closest_neg)

-- Inputs --
sim:
[[ 0.9 -0.8  0.3 -0.5]
 [-0.4  0.5  0.1 -0.1]
 [ 0.3  0.1 -0.4 -0.8]
 [-0.5 -0.2 -0.7  0.5]]
shape: (4, 4)

sim_ap:
[[ 0.9  0.   0.   0. ]
 [ 0.   0.5  0.   0. ]
 [ 0.   0.  -0.4  0. ]
 [ 0.   0.   0.   0.5]]

sim_an:
[[ 0.  -0.8  0.3 -0.5]
 [-0.4  0.   0.1 -0.1]
 [ 0.3  0.1  0.  -0.8]
 [-0.5 -0.2 -0.7  0. ]]

-- Outputs --

mean_neg:
[[-0.33333333]
 [-0.13333333]
 [-0.13333333]
 [-0.46666667]]

closest_neg :
[[ 0.3]
 [ 0.1]
 [-0.8]
 [-0.2]]

# Hardcoded matrix of similarity scores
sim_hardcoded = np.array(
    [
        [0.9, -0.8, 0.3, -0.5],
        [-0.4, 0.5, 0.1, -0.1],
        [0.3, 0.1, -0.4, -0.8],
        [-0.5, -0.2, -0.7, 0.5],
    ]
)

sim = sim_hardcoded

### START CODE HERE ###
# Try using different values for the matrix of similarity scores
# sim = 2 * np.random.random_sample((b,b)) -1   # random similarity scores between -1 and 1
# sim = sim_2                                   # the matrix calculated previously using vector normalization and dot product
### END CODE HERE ###

# Batch size
b = sim.shape[0]

print("-- Inputs --")
print("sim :")
print(sim)
print("shape :", sim.shape, "\n")

# Positives
# All the s(A,P) values : similarities from duplicate question pairs (aka Positives)
# These are along the diagonal
sim_ap = tf.linalg.diag_part(sim) # this is just a 1D array of diagonal elements
print("sim_ap :")
# tf.linalg.diag makes a diagonal matrix given an array
print(tf.linalg.diag(sim_ap), "\n")

# Negatives
# all the s(A,N) values : similarities the non duplicate question pairs (aka Negatives)
# These are in the off diagonals
sim_an = sim - tf.linalg.diag(sim_ap)
print("sim_an :")
print(sim_an, "\n")

print("-- Outputs --")
# Mean negative
# Average of the s(A,N) values for each row
mean_neg = tf.math.reduce_sum(sim_an, axis=1) / (b - 1)
print("mean_neg :")
print(mean_neg, "\n")

# Closest negative
# Max s(A,N) that is <= s(A,P) for each row
mask_1 = tf.eye(b) == 1            # mask to exclude the diagonal
mask_2 = sim_an > tf.expand_dims(sim_ap, 1)  # mask to exclude sim_an > sim_ap
mask = tf.cast(mask_1 | mask_2, tf.float64)
sim_an_masked = sim_an - 2.0*mask

closest_neg = tf.math.reduce_max(sim_an_masked, axis=1)
print("closest_neg :")
print(closest_neg, "\n")

-- Inputs --
sim :
[[ 0.9 -0.8  0.3 -0.5]
 [-0.4  0.5  0.1 -0.1]
 [ 0.3  0.1 -0.4 -0.8]
 [-0.5 -0.2 -0.7  0.5]]
shape : (4, 4) 

sim_ap :
tf.Tensor(
[[ 0.9  0.   0.   0. ]
 [ 0.   0.5  0.   0. ]
 [ 0.   0.  -0.4  0. ]
 [ 0.   0.   0.   0.5]], shape=(4, 4), dtype=float64) 

sim_an :
tf.Tensor(
[[ 0.  -0.8  0.3 -0.5]
 [-0.4  0.   0.1 -0.1]
 [ 0.3  0.1  0.  -0.8]
 [-0.5 -0.2 -0.7  0. ]], shape=(4, 4), dtype=float64) 

-- Outputs --
mean_neg :
tf.Tensor([-0.33333333 -0.13333333 -0.13333333 -0.46666667], shape=(4,), dtype=float64) 

closest_neg :
tf.Tensor([ 0.3  0.1 -0.8 -0.2], shape=(4,), dtype=float64)

# Alpha margin
alpha = 0.25

# Modified triplet loss
# Loss 1
l_1 = tf.maximum(mean_neg - sim_ap + alpha, 0)
print(f"Loss 1: {l_1}\n")
# Loss 2
l_2 = tf.maximum(closest_neg - sim_ap + alpha, 0)
print(f"Loss 2: {l_2}\n")
# Loss full<
l_full = l_1 + l_2
# Cost
cost = tf.math.reduce_sum(l_full)

print("-- Outputs --")
print("Loss full :")
print(l_full, "\n")
print("Cost :", "{:.3f}".format(cost))

Loss 1: [0.         0.         0.51666667 0.        ]

Loss 2: [0. 0. 0. 0.]

-- Outputs --
Loss full :
tf.Tensor([0.         0.         0.51666667 0.        ], shape=(4,), dtype=float64) 

Cost : 0.517

Modified Triplet Loss : Ungraded Lecture Notebook¶

Background¶

Imports¶

Similarity Scores¶

Two Vectors¶

Two Batches of Vectors¶

Hard Negative Mining¶

Mean Negative¶

Closest Negative¶

The Loss Functions¶

Summary¶