一、基本概念 #
- 选品、选客:从庞大的商品库(视频/节目/用户数据)中按照特定的规则筛选出需要的商品(视频/节目/用户数据等)的过程
- 实体:圈选的主题,比如商品、节目、视频、用户等,不同的实体对应不同类型的选品,比如商品选品、节目选品等
- 标签:选品等最小单位,比如:性别、年龄、地域、观看时长等
- 规则:标签的各种组合方式,比如:性别=男 && 年龄在20到35之间 && (地域=北京||地域=上海)
- SCG(standard content group):标准内容组
二、效果图 #
三、具体设计 #
1、中间件介绍 #
- mysql:略
- es: Elasticsearch 底层数据结构
- Greenplum
- AnalyticDB
- redis
- HBase
2、库表设计 #
主要涉及五张表:实体表、类目表、标签表、标签值表、SCG表
- 实体表(entity)
CREATE TEBALE `entity` (
`id` int NOT NULL AUTO_INCREMENT,
`name` varchar(45) NOT NULL COMMENT '实体名称',
`ename` varchar(50) NOT NULL COMMENT '实体唯一标识',
`status` int NOT NULL DEFAULT '0' COMMENT '实体状态 0:正常 1:异常 默认0',
`create_time` varchar(45) DEFAULT NOT NULL COMMENT '创建时间',
`create_user` varchar(45) DEFAULT NOT NULL COMMENT '创建人',
`update_user` varchar(45) DEFAULT NOT NULL COMMENT '更新人',
PRIMARY KEY(`id`),
UNIQUE KEY `ename_UNIQUE` (`ename`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE-utf8mb4_0900_ai_ci
INSERT INTO `entity` (`name`, `ename`, `status`, `create_time`, `create_user`, `update_user`)
VALUES
('实体1', 'entity1', 0, '2024-06-10 12:00:00', 'creator1', 'updater1'),
('实体2', 'entity2', 0, '2024-06-10 13:00:00', 'creator2', 'updater2'),
('实体3', 'entity3', 1, '2024-06-10 14:00:00', 'creator3', 'updater3');
- 类目表(category)
CREATE TABLE `category` (
`id` int NOT NULL AUTO_INCREMENT,
`name` varchar(45) NOT NULL COMMENT '类目名称',
`status` int DEFAULT '0' COMMENT '类目状态 0:正常 1:异常',
`level` int DEFAULT '1' COMMENT '类目级别',
`parent_id` int DEFAULT '0' COMMENT '父类目ID 0代表顶级类目'
PRIMATY KEY(`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE-utf8mb4_0900_ai_ci
INSERT INTO `category` (`name`, `level`, `parent_id`)
VALUES
('基础属性', 1, 0),
('视频属性', 1, 0),
('基本特征', 2, 1),
('地域属性', 2, 1),
('近30天', 2, 2),
('近90天', 2, 2);
- 标签表(tags)
CREATE TABLE `tags` (
`id` int NOT NULL AUTO_INCREMENT,
`name` varchar(45) NOT NULL COMMENT '标签名称',
`tag_key` varchar(50) NOT NULL COMMENT '标签唯一标识',
`status` int NOT NULL DEFAULT '0' COMMENT '标签状态 0:正常 1:失效 默认0',
`create_time` varchar(45) NOT NULL COMMENT '创建时间'
`update_time` varchar(45) DEFAULT NULL COMMENT '更新时间'
`tag_type` varchar(45) DEFAULT 'STRING'COMMENT '标签类型 STRING NUMBER BOOLEAN DATE',
`is_multiple` int DEFAULT '0' COMMENT '是否支持多值 0:单值 1:单值 默认0'
`category_id` int DEFAULT '0' COMMENT '类目ID'
`control_type` varchar(45) DEFAULT NULL COMMENT '控件类型 INPUT SELECTIVE DATE NUMBE_RANGE DATE_RANGE',
PRIMARY KEY(`id`),
UNIQUE KEY `ename_UNIQUE` (`tag_key`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE-utf8mb4_0900_ai_ci
INSERT INTO `tags` (`name`, `tag_key`, `status`, `create_time`, `update_time`, `tag_type`, `is_multiple`, `category_id`, `control_type`) VALUES
('性别', 'sex', 0, '2024-06-11 10:00:00', NULL, 'STRING', 0, 6, 'SELECTIVE'),
('姓名', 'name', 0, '2024-06-11 10:05:00', NULL, 'STRING', 1, 0, 'INPUT'),
('所在城市', 'area', 0, '2024-06-11 10:10:00', NULL, 'STRING', 0, 1, 'SELECTIVE'),
('看过的剧', 'view_tv', 0, '2024-06-11 10:15:00', NULL, 'STRING', 1, 5, 'SELECTIVE');
- 标签值表(tag_values)
真实业务不会采用mysql存放标签值,可以采用ES/hbase等
CREATE TABLE `tag_values` (
`id` int NOT NULL AUTO_INCREMENT,
`tag_key` varchar(50) NOT NULL COMMENT '标签唯一标识',
`tag_value` varchar(45) NOT NULL COMMENT '标签值'
`status` int NOT NULL DEFAULT '0' COMMENT '标签状态 0:正常 1:失效 默认0',
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE-utf8mb4_0900_ai_ci
INSERT INTO `tag_values` (`tag_key`, `tag_value`) VALUES
('sex', '男'),
('sex', '女'),
('area', '北京市'),
('area', '上海市'),
('view_tv', '天龙八部');
('view_tv', '理想之城');
- SCG表(scg)
CREATE TABLE `scg` (
`id` int NOT NULL AUTO_INCREMENT,
`name` varchar(45) NOT NULL COMMENT `scg名称`,
`status` int NOT NULL DEFAULT '0' COMMENT '状态 0:正常 1:异常 默认0',
`create_time` varchar(45) NOT NULL COMMENT '创建时间',
`update_time` varchar(45) DEFAULT NULL COMMENT '更新时间',
`create_user` varchar(45) DEFAULT NULL COMMENT '创建人',
`update_user` varchar(45) DEFAULT NULL COMMENT '更新人',
`rule` json DEFAULT NULL COMMENT '具体规则 JSON格式',
`entity_ename` varchar(45) NOT NULL COMMENT 'SCG所属实体',
`encircle_type` int DEFAULT '0' COMMENT '圈选类型 0:定投 1:选品',
`update_frency` int DEFAULT '0' COMMENT '更新频率 0:秒级别 1:分钟级 2:天级',
PRIMARY KEY (`id`)
) ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE-utf8mb4_0900_ai_ci
INSERT INTO `scg` (`name`, `status`, `create_time`, `update_time`, `create_user`, `update_user`, `rule`, `entity_ename`, `encircle_type`, `update_frency`) VALUES
('20到35岁的男性用户', 0, '2024-06-11 10:00:00', NULL, '创建用户A', NULL, '{"condition": "age <= 35 && age >= 20"}', 'crowd', 1, 1),
('状态正常并且隐私等级为公开的视频', 0, '2024-06-11 11:00:00', NULL, '创建用户B', NULL, '{}', 'video', 1, 1),
3、通用规则设计 #
规则实体类定义:
- CommonSearchQueryDTO(最终规则形式)
public class CommonSearchQueryDTO {
/**
* 查询规则
*/
private CommonSearchQueryGroupDTO groupDTO;
/**
* 排序规则
*/
private List<CommonSearchSortDTO> searchSortDTOList;
}
- CommonSearchQueryGroupDTO(查询规则)
public class CommonSearchQueryGroupDTO {
/**
* 策略类型 {@link CommonSearchPolicyTypeEnum}
*/
private String policyType;
/**
* 规则单项
*/
private List<CommonSearchQueryItemDTO> itemList = new LinkedList<CommonSearchQueryItemDTO>();
/**
* 规则分组
*/
private List<ComonSearchQueryGroupDTO> groupList = new LinkedList<CommonSearchGroupDTO>();
}
- CommonSearchSortDTO(排序规则)
public class CommonSearchSortDTO {
/**
* 标签唯一标识
*/
private String tagKey;
/**
* 是否升序
*/
private boolean asc;
}
- CommonSearchQueryItemDTO(标签具体信息)
public class CommonSearchQueryItemDTO {
/**
* 标签唯一标识
*/
private String tagKey;
/**
* 操作 {@link CommonSearchOperateEnum}
*/
private String operate;
/**
* 包含条件
*/
private List<String> terms;
/**
* 范围条件
*/
private CommonSearchQueryRangeDTO range;
/**
* 精确或全文检索条件
*/
private String value;
/**
* 扩展存储
*/
private Map<String, String> extraMap;
}
- CommonSearchOperateEnum(操作类型枚举)
public enum CommonSearchOperateEnum {
CONTAIN("包含"),
NOT_CONTAIN("不包含"),
RANGE("在范围内"),
EQUAL("等于"),
NOT_EQUAL("不等于"),
LIKE("模糊匹配"),
EXIST("是否存在");
}
- CommonSearchQueryRangeDTO(选品策略-范围查询枚举值)
public class CommonSearchQueryRangeDTO {
/**
* 大于,和gte只能有一个出现,同时出现gt字短会被忽视
*/
private String gt;
/**
* 大于等于
*/
private String gte;
/**
* 小于,和lte只能有一个出现,同时出现lt字段会被忽视
*/
private String lt;
/**
* 小于等于
*/
private String lte;
}
- CommonSearchPolicyTypeEnum(选品策略-标签之间的关系 与/或)
public enum commonSearchPolicyTypeEnum {
AND ("以下Group内满足所有条件"),
OR ("以下Group内满足任一条件");
}
另外提供一个规则转换类Adb3GenerateManager,用于将规则转化为SQL(也可以转化为ES语法)
import java.util.ArrayList;
import java.util.List;
import com.google.common.collect.Lists; // 假设导入了Google Guava库中的Lists类
import org.apache.commons.lang3.StringUtils; // 假设导入了Apache Commons Lang库中的StringUtils类
public class Adb3GenerateManager {
/**
* 控制哪些标签支持like
*/
public static List<String> likeTagKeyList = Lists.newArrayList("name");
public String generateResultSql(CommonSearchQueryDTO queryDTO, Map<String, TagsDO> tableFieldPropertyMap) {
if (queryDTO == null || tableFieldPropertyMap == null || tableFieldPropertyMap.size() == 0) {
return "";
}
CommonSearchQueryGroupDTO groupDTO = queryDTO.getGroupDTO();
List<CommonSearchSortDTO> searchSortDTOList = queryDTO.getSearchSortDTOList();
String searchSql = generateSearchConditions(groupDTO, tableFieldPropertyMap);
String orderSql = generateOrderBys(searchSortDTOList);
if (orderSql != null && orderSql.length() > 0) {
searchSql += " order by " + orderSql;
}
return searchSql;
}
public String generateOrderBys(List<CommonSearchSortDTO> sorts) {
StringBuilder sortSql = new StringBuilder();
// 在SQL中添加排序信息
if (sorts != null && sorts.size() > 0) {
for (CommonSearchSortDTO sort : sorts) {
sortSql.append(sort.getTagKey())
.append(" ")
.append(sort.isAsc() ? "ASC" : "DESC").append(",");
}
if (sortSql.length() > 0) {
sortSql.deleteCharAt(sortSql.length() - 1);
}
}
return sortSql.toString();
}
public String generateSearchConditions(CommonSearchQueryGroupDTO schema,Map<String, TagsDO> tableFieldPropertyMap) {
StringBuilder conditions = new StringBuilder();
if (tableFieldPropertyMap == null) {
tableFieldPropertyMap = new HashMap();
}
if (schema != null) {
boolean hasConditions = false;
String joinFlag = "AND";
if (CommonSearchPolicyTypeEnum.OR.name().equals(schema.getPolicyType())) {
joinFlag = "OR";
}
conditions.append("( ");
// 将单个的条件组装多个条件,用AND或OR操作符连接起来
boolean notFirst = false;
if (schema.getItemList() != null && schema.getItemList().size() > 0) {
for (CommonSearchQueryItemDTO item : schema.getItemList()) {
TagsDO tagsDO = tableFieldPropertyMap.get(item.getTagKey());
String singleCondition = generateSingleSearchCondition(item, tagsDO);
if (StringUtils.isNotEmpty(singleCondition)) {
if (notFirst) {
conditions.append(" ").append(joinFlag).append(" ");
}
conditions.append(singleCondition);
notFirst = true;
hasConditions = true;
}
}
}
// 如果还有多个Group,递归调用groupList生成其它的条件,不过递归调用时不传递排序和白名单、黑名单数据
if (schema.getGroupList() != null && schema.getGroupList().size() > 0) {
for (CommonSearchQueryGroupDTO group : schema.getGroupList()) {
String groupCondition = generateSearchConditions(group, tableFieldPropertyMap);
if (StringUtils.isNotEmpty(groupCondition)) {
if (notFirst) {
conditions.append(" ").append(joinFlag).append(" ");
}
conditions.append(groupCondition);
notFirst = true;
hasConditions = true;
}
}
}
conditions.append(" )");
// 如果最终发现没有条件,那么要删除conditions中已经初始化的内容
if (false == hasConditions) {
conditions.delete(0, conditions.length());
}
}
return conditions.toString();
}
private static boolean valueIsString(TagsDO tagsDO) {
if (tagsDO == null) {
return false;
}
String type = tagsDO.getTagType();
if ("STRING".equals(type)) {
return true;
}
return false;
}
private static boolean valueIsBoolean(TagsDO tagsDO) {
if (tagsDO == null) {
return false;
}
String type = tagsDO.getTagType();
if ("BOOLEAN".equals(type)) {
return true;
}
return false;
}
private String generateSingleSearchCondition(CommonSearchQueryItemDTO item, TagsDO tagsDO) {
if (item == null || tagsDO == null) {
return "";
}
CommonSearchOperateEnum operateEnum = CommonSearchOperateEnum.index(item.getOperate());
if (operateEnum == null) {
return "";
}
switch (operateEnum) {
case CONTAIN:
case NOT_CONTAIN:
return generateContain(item, tagsDO);
case EQUAL:
case NOT_EQUAL:
return generateEqual(item, tagsDO);
case RANGE:
return generateRange(item, tagsDO);
case LIKE:
return generateLike(item, tagsDO);
case EXIST:
return generateExist(item, tagsDO);
default:
break;
}
return null;
}
private String generateExist(CommonSearchQueryItemDTO item, TagsDO tagsDO) {
StringBuilder sb = new StringBuilder();
String value = item.getValue();
if (BooleanUtils.toBoolean(value)) {
sb.append(item.getTagKey()).append(" IS NOT NULL");
} else {
sb.append(item.getTagKey()).append(" IS NULL");
}
return sb.toString();
}
private String generateLike(CommonSearchQueryItemDTO item, TagsDO tagsDO) {
StringBuilder sb = new StringBuilder();
String value = item.getValue();
sb.append(item.getTagKey()).append(" LIKE '%").append(value).append("%'");
return sb.toString();
}
private String generateRange(CommonSearchQueryItemDTO item, TagsDO tagsDO) {
StringBuilder multiConditions = new StringBuilder();
String joinFlag = "";
if (StringUtils.isNotEmpty(item.getRange().getLt())) {
String value = item.getRange().getLt();
multiConditions.append(joinFlag).append(item.getTagKey()).append(" < ").append(value);
joinFlag = " AND ";
}
if (StringUtils.isNotEmpty(item.getRange().getLte())) {
String value = item.getRange().getLte();
multiConditions.append(joinFlag).append(item.getTagKey()).append(" <= ").append(value);
joinFlag = " AND ";
}
if (StringUtils.isNotEmpty(item.getRange().getGt())) {
String value = item.getRange().getGt();
multiConditions.append(joinFlag).append(item.getTagKey()).append(" > ").append(value);
joinFlag = " AND ";
}
if (StringUtils.isNotEmpty(item.getRange().getGte())) {
String value = item.getRange().getGte();
multiConditions.append(joinFlag).append(item.getTagKey()).append(" >= ").append(value);
}
return multiConditions.toString();
}
private String generateEqual(CommonSearchQueryItemDTO item, TagsDO tagsDO) {
CommonSearchOperateEnum operateEnum = CommonSearchOperateEnum.index(item.getOperate());
String operator = "=";
if (CommonSearchOperateEnum.NOT_EQUAL.equals(operateEnum)) {
operator = "!=";
}
String value = item.getValue();
if (valueIsString(tagsDO)) {
value = "'" + value + "'";
} else if (valueIsBoolean(tagsDO)) {
value = String.valueOf(BooleanUtils.toBoolean(value));
}
return item.getTagKey() + " " + operator + " " + value;
}
private static String generateContain(CommonSearchQueryItemDTO item, TagsDO tagsDO) {
CommonSearchOperateEnum operateEnum = CommonSearchOperateEnum.index(item.getOperate());
if (likeTagKeyList.contains(item.getTagKey())) {
List<String> values = new ArrayList<>();
if (item.getTerms() != null && item.getTerms().size() > 0) {
values.addAll(item.getTerms());
} else if (StringUtils.isNotBlank(item.getValue())) {
String trimValue = item.getValue();
String[] valueStrings = trimValue.split(",");
for (String value : valueStrings) {
if (StringUtils.isNotBlank(value)) {
values.add(value.trim());
}
}
}
if (values != null && values.size() > 0) {
StringBuilder sb = new StringBuilder();
sb.append("(");
int index = 0;
for (String value : values) {
if (StringUtils.isBlank(value.trim())) {
continue;
}
if (index > 0) {
sb.append(" OR ");
}
index++;
String tempValue = value;
sb.append(item.getTagKey()).append(" like '%").append(tempValue).append("%'");
}
sb.append(")");
if (CommonSearchOperateEnum.NOT_CONTAIN.equals(operateEnum)) {
return " not " + sb.toString();
}
return sb.toString();
}
return "";
}
// 包含还是不包含应该取terms属性的值,但是如果没有,那就取value,把value当做英文逗号分隔的多个值.
StringBuilder values = new StringBuilder();
if (item.getTerms() != null && item.getTerms().size() > 0) {
if (valueIsString(tagsDO)) {
item.getTerms().forEach(value -> {
String tempValue = value;
if (StringUtils.isNotBlank(tempValue)) {
values.append("'").append(tempValue).append("',");
}
});
} else {
item.getTerms().forEach(value -> {
String tempValue = value;
if (StringUtils.isNotBlank(tempValue)) {
values.append(tempValue).append(",");
}
});
}
} else if (StringUtils.isNotBlank(item.getValue())) {
String trimValue = item.getValue();
String[] valueStrings = trimValue.split(",");
if (valueStrings != null && valueStrings.length > 0) {
if (valueIsString(tagsDO)) {
for (String value : valueStrings) {
String tempValue = value;
if (StringUtils.isNotBlank(tempValue)) {
values.append("'").append(tempValue).append("',");
}
}
} else {
for (String value : valueStrings) {
String tempValue = value;
if (StringUtils.isNotBlank(tempValue)) {
values.append(tempValue).append(",");
}
}
}
}
}
if (values.length() > 0) {
values.deleteCharAt(values.length() - 1);
}
if (tagsDO.getIsMultiple() != null && tagsDO.getIsMultiple() == 1) {
// ADB存的是多值使用多值语法
if (CommonSearchOperateEnum.NOT_CONTAIN.equals(operateEnum)) {
return "( ref(" + item.getTagKey() + ", 0) not in (" + values + ") or " + item.getTagKey() + " is null )";
}
// 使用ADB3的多值查询语法查询
return "ref(" + item.getTagKey() + ", 0) in (" + values + ") ";
} else {
// ADB存的是单值使用in语法
if (CommonSearchOperateEnum.NOT_CONTAIN.equals(operateEnum)) {
return "(" + item.getTagKey() + " not in (" + values + ") or " + item.getTagKey() + " is null )";
}
return item.getTagKey() + " in (" + values + ") ";
}
}
}
下面是两个例子:
demo1: 性别=男 && 年龄在20-35之间 && (地域=北京||地域=上海) 的用户,返回结果按照id升序排列,年龄降序
{
"groupDTO": {
"policyType": "AND",
"itemList": [
{
"tagKey": "sex",
"operate": "EQUAL",
"terms": null,
"range": null,
"value": "男性",
"extraMap": null
},
{
"tagKey": "age",
"operate": null,
"terms": null,
"range": {
"gt": null,
"gte": "20",
"lt": null,
"lte": "35"
},
"value": null,
"extraMap": null
},
{
"tagKey": "area",
"operate": "CONTAIN",
"terms": ["北京", "上海"],
"range": null,
"value": null,
"extraMap": null
}
],
"groupList": null
},
"searchSortDTOList": [
{
"tagkey": "id",
"asc": true
},
{
"tagkey": "age",
"asc": false
}
]
}
经过Adb3GenerateManager转换成SQL如下:
(sex = '男性' AND ref(area, 0) in ('北京', '上海')) order by id ASC, age DESC
4、系统架构图 #