def multi_target_attention(q, k, v, m, seq_name, num_heads=4):
# query: [B, H]
# key: [B, T, H]
# mask: [B, T]
q = tf.expand_dims(q, axis=1)
q = tf.concat(tf.split(q, num_heads, axis=2), axis=0)
k = tf.concat(tf.split(k, num_heads, axis=2), axis=0)
v = tf.concat(tf.split(v, num_heads, axis=2), axis=0)
q = layer_normal(q)
k = layer_normal(k)
outputs = tf.matmul(q, k, transpose_b=True)
k_length = tf.shape(k)[1]
outputs = outputs * tf.pow(tf.cast(k_length, outputs.dtype), -0.5)
m = tf.expand_dims(m, axis=1)
masks = tf.tile(m, [num_heads, 1, 1])
masks = tf.equal(masks, tf.ones_like(masks))
paddings = tf.fill(tf.shape(outputs), tf.constant(-2 ** 32 + 1, dtype=outputs.dtype))
outputs = tf.where(masks, outputs, paddings)
outputs = tf.nn.softmax(outputs)
outputs = tf.matmul(outputs, v)
outputs = tf.concat(tf.split(outputs, num_heads, axis=0), axis=2)
outputs = tf.squeeze(outputs, axis=1)
return outputs
multi_target_attention
TensorFlow相关文章
最近热门
- tf.feature_column.weighted_categorical_column
- LLM模型参数量计算
- 论文 | TWIN V2: Scaling Ultra-Long User Behavior Sequence Modeling for Enhanced CTR Prediction at Kuaishou
- tf.feature_column.shared_embedding_columns
- ITC(Image-Text Contrastive)loss和ITM(Image-Text Matching)loss
- ConvNeXt:基于卷积神经网络(CNN)的图像识别架构
- LLM4REC
- 98: Address already in use 解决方案
- linux C++ 定时器代码
- 034 Tensorflow | 多GPU编程
最常浏览
- 016 推荐系统 | 排序学习(LTR - Learning To Rank)
- 偏微分符号
- i.i.d(又称IID)
- 利普希茨连续条件(Lipschitz continuity)
- (error) MOVED 原因和解决方案
- TextCNN详解
- 找不到com.google.protobuf.GeneratedMessageV3的类文件
- Deployment failed: repository element was not specified in the POM inside distributionManagement
- cannot access com.google.protobuf.GeneratedMessageV3 解决方案
- CLUSTERDOWN Hash slot not served 问题原因和解决办法
×