Skip to main content

从零开始搭建选品/选客系统

·1772 words·9 mins
WFUing
Author
WFUing
A graduate who loves coding.
Table of Contents

一、基本概念
#

  • 选品、选客:从庞大的商品库(视频/节目/用户数据)中按照特定的规则筛选出需要的商品(视频/节目/用户数据等)的过程
  • 实体:圈选的主题,比如商品、节目、视频、用户等,不同的实体对应不同类型的选品,比如商品选品、节目选品等
  • 标签:选品等最小单位,比如:性别、年龄、地域、观看时长等
  • 规则:标签的各种组合方式,比如:性别=男 && 年龄在20到35之间 && (地域=北京||地域=上海)
  • SCG(standard content group):标准内容组

二、效果图
#

三、具体设计
#

1、中间件介绍
#

  1. mysql:略
  2. es: Elasticsearch 底层数据结构
  3. Greenplum
  4. AnalyticDB
  5. redis
  6. HBase

2、库表设计
#

主要涉及五张表:实体表、类目表、标签表、标签值表、SCG表

  1. 实体表(entity)
CREATE TEBALE `entity` (
    `id` int NOT NULL AUTO_INCREMENT,
    `name` varchar(45) NOT NULL COMMENT '实体名称',
    `ename` varchar(50) NOT NULL COMMENT '实体唯一标识',
    `status` int NOT NULL DEFAULT '0' COMMENT '实体状态 0:正常 1:异常 默认0',
    `create_time` varchar(45) DEFAULT NOT NULL COMMENT '创建时间',
    `create_user` varchar(45) DEFAULT NOT NULL COMMENT '创建人',
    `update_user` varchar(45) DEFAULT NOT NULL COMMENT '更新人',
    PRIMARY KEY(`id`),
    UNIQUE KEY `ename_UNIQUE` (`ename`)
)   ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE-utf8mb4_0900_ai_ci

INSERT INTO `entity` (`name`, `ename`, `status`, `create_time`, `create_user`, `update_user`)
VALUES
('实体1', 'entity1', 0, '2024-06-10 12:00:00', 'creator1', 'updater1'),
('实体2', 'entity2', 0, '2024-06-10 13:00:00', 'creator2', 'updater2'),
('实体3', 'entity3', 1, '2024-06-10 14:00:00', 'creator3', 'updater3');
  1. 类目表(category)
CREATE TABLE `category` (
    `id` int NOT NULL AUTO_INCREMENT,
    `name` varchar(45) NOT NULL COMMENT '类目名称',
    `status` int DEFAULT '0' COMMENT '类目状态 0:正常 1:异常',
    `level` int DEFAULT '1' COMMENT '类目级别',
    `parent_id` int DEFAULT '0' COMMENT '父类目ID 0代表顶级类目'
    PRIMATY KEY(`id`)
)   ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE-utf8mb4_0900_ai_ci

INSERT INTO `category` (`name`, `level`, `parent_id`)
VALUES
('基础属性', 1, 0),
('视频属性', 1, 0),
('基本特征', 2, 1),
('地域属性', 2, 1),
('近30天', 2, 2),
('近90天', 2, 2);
  1. 标签表(tags)
CREATE TABLE `tags` (
    `id` int NOT NULL AUTO_INCREMENT,
    `name` varchar(45) NOT NULL COMMENT '标签名称',
    `tag_key` varchar(50) NOT NULL COMMENT '标签唯一标识',
    `status` int NOT NULL DEFAULT '0' COMMENT '标签状态 0:正常 1:失效 默认0',
    `create_time` varchar(45) NOT NULL COMMENT '创建时间'
    `update_time` varchar(45) DEFAULT NULL COMMENT '更新时间'
    `tag_type` varchar(45) DEFAULT 'STRING'COMMENT '标签类型 STRING NUMBER BOOLEAN DATE',
    `is_multiple` int DEFAULT '0' COMMENT '是否支持多值 0:单值 1:单值 默认0'
    `category_id` int DEFAULT '0' COMMENT '类目ID'
    `control_type` varchar(45) DEFAULT NULL COMMENT '控件类型 INPUT SELECTIVE DATE NUMBE_RANGE DATE_RANGE',
    PRIMARY KEY(`id`),
    UNIQUE KEY `ename_UNIQUE` (`tag_key`)
)   ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE-utf8mb4_0900_ai_ci

INSERT INTO `tags` (`name`, `tag_key`, `status`, `create_time`, `update_time`, `tag_type`, `is_multiple`, `category_id`, `control_type`) VALUES
('性别', 'sex', 0, '2024-06-11 10:00:00', NULL, 'STRING', 0, 6, 'SELECTIVE'),
('姓名', 'name', 0, '2024-06-11 10:05:00', NULL, 'STRING', 1, 0, 'INPUT'),
('所在城市', 'area', 0, '2024-06-11 10:10:00', NULL, 'STRING', 0, 1, 'SELECTIVE'),
('看过的剧', 'view_tv', 0, '2024-06-11 10:15:00', NULL, 'STRING', 1, 5, 'SELECTIVE');
  1. 标签值表(tag_values)

真实业务不会采用mysql存放标签值,可以采用ES/hbase等

CREATE TABLE `tag_values` (
    `id` int NOT NULL AUTO_INCREMENT,
    `tag_key` varchar(50) NOT NULL COMMENT '标签唯一标识',
    `tag_value` varchar(45) NOT NULL COMMENT '标签值'
    `status` int NOT NULL DEFAULT '0' COMMENT '标签状态 0:正常 1:失效 默认0',
    PRIMARY KEY (`id`)
)   ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE-utf8mb4_0900_ai_ci

INSERT INTO `tag_values` (`tag_key`, `tag_value`) VALUES
('sex', '男'),
('sex', '女'),
('area', '北京市'),
('area', '上海市'),
('view_tv', '天龙八部');
('view_tv', '理想之城');
  1. SCG表(scg)
CREATE TABLE `scg` (
    `id` int NOT NULL AUTO_INCREMENT,
    `name` varchar(45) NOT NULL COMMENT `scg名称`,
    `status` int NOT NULL DEFAULT '0' COMMENT '状态 0:正常 1:异常 默认0',
    `create_time` varchar(45) NOT NULL COMMENT '创建时间',
    `update_time` varchar(45) DEFAULT NULL COMMENT '更新时间',
    `create_user` varchar(45) DEFAULT NULL COMMENT '创建人',
    `update_user` varchar(45) DEFAULT NULL COMMENT '更新人',
    `rule` json DEFAULT NULL COMMENT '具体规则 JSON格式',
    `entity_ename` varchar(45) NOT NULL COMMENT 'SCG所属实体',
    `encircle_type` int DEFAULT '0' COMMENT '圈选类型 0:定投 1:选品',
    `update_frency` int DEFAULT '0' COMMENT '更新频率 0:秒级别 1:分钟级 2:天级',
    PRIMARY KEY (`id`)
)   ENGINE=InnoDB DEFAULT CHARSET=utf8mb4 COLLATE-utf8mb4_0900_ai_ci

INSERT INTO `scg` (`name`, `status`, `create_time`, `update_time`, `create_user`, `update_user`, `rule`, `entity_ename`, `encircle_type`, `update_frency`) VALUES
('20到35岁的男性用户', 0, '2024-06-11 10:00:00', NULL, '创建用户A', NULL, '{"condition": "age <= 35 && age >= 20"}', 'crowd', 1, 1),
('状态正常并且隐私等级为公开的视频', 0, '2024-06-11 11:00:00', NULL, '创建用户B', NULL, '{}', 'video', 1, 1),

3、通用规则设计
#

规则实体类定义:

  • CommonSearchQueryDTO(最终规则形式)
public class CommonSearchQueryDTO {
    /**
     * 查询规则
    */
   private CommonSearchQueryGroupDTO groupDTO;

   /**
    * 排序规则
   */
    private List<CommonSearchSortDTO> searchSortDTOList;
}
  • CommonSearchQueryGroupDTO(查询规则)
public class CommonSearchQueryGroupDTO {
    /**
     * 策略类型 {@link CommonSearchPolicyTypeEnum}
    */
    private String policyType;

    /**
     * 规则单项
    */
    private List<CommonSearchQueryItemDTO> itemList = new LinkedList<CommonSearchQueryItemDTO>();

    /**
     * 规则分组
     */
    private List<ComonSearchQueryGroupDTO> groupList = new LinkedList<CommonSearchGroupDTO>();
}
  • CommonSearchSortDTO(排序规则)
public class CommonSearchSortDTO {
    /**
     * 标签唯一标识
    */
    private String tagKey;

    /**
     * 是否升序
    */
    private boolean asc;
}
  • CommonSearchQueryItemDTO(标签具体信息)
public class CommonSearchQueryItemDTO {
    /**
     * 标签唯一标识
    */
    private String tagKey;

    /**
     * 操作 {@link CommonSearchOperateEnum}
    */
    private String operate;

    /**
     * 包含条件
    */
    private List<String> terms;

    /**
     * 范围条件
    */
    private CommonSearchQueryRangeDTO range;

    /**
     * 精确或全文检索条件
    */
    private String value;

    /**
     * 扩展存储
    */
    private Map<String, String> extraMap;
}
  • CommonSearchOperateEnum(操作类型枚举)
public enum CommonSearchOperateEnum {
    CONTAIN("包含"),
    NOT_CONTAIN("不包含"),
    RANGE("在范围内"),
    EQUAL("等于"),
    NOT_EQUAL("不等于"),
    LIKE("模糊匹配"),
    EXIST("是否存在");
}
  • CommonSearchQueryRangeDTO(选品策略-范围查询枚举值)
public class CommonSearchQueryRangeDTO {
    /**
     * 大于,和gte只能有一个出现,同时出现gt字短会被忽视
    */
    private String gt;

    /**
     * 大于等于
    */
    private String gte;

    /**
     * 小于,和lte只能有一个出现,同时出现lt字段会被忽视
    */
    private String lt;

    /**
     * 小于等于
    */
    private String lte;
}
  • CommonSearchPolicyTypeEnum(选品策略-标签之间的关系 与/或)
public enum commonSearchPolicyTypeEnum {
    AND ("以下Group内满足所有条件"),
    OR ("以下Group内满足任一条件");
}

另外提供一个规则转换类Adb3GenerateManager,用于将规则转化为SQL(也可以转化为ES语法)

import java.util.ArrayList;
import java.util.List;
import com.google.common.collect.Lists; // 假设导入了Google Guava库中的Lists类
import org.apache.commons.lang3.StringUtils; // 假设导入了Apache Commons Lang库中的StringUtils类

public class Adb3GenerateManager {
    /**
     * 控制哪些标签支持like
     */
    public static List<String> likeTagKeyList = Lists.newArrayList("name");
    public String generateResultSql(CommonSearchQueryDTO queryDTO, Map<String, TagsDO> tableFieldPropertyMap) {
        if (queryDTO == null || tableFieldPropertyMap == null || tableFieldPropertyMap.size() == 0) {
            return "";
        }
        CommonSearchQueryGroupDTO groupDTO = queryDTO.getGroupDTO();
        List<CommonSearchSortDTO> searchSortDTOList = queryDTO.getSearchSortDTOList();
        String searchSql = generateSearchConditions(groupDTO, tableFieldPropertyMap);
        String orderSql = generateOrderBys(searchSortDTOList);
        if (orderSql != null && orderSql.length() > 0) {
            searchSql += " order by " + orderSql;
        }
        return searchSql;
    }
    public String generateOrderBys(List<CommonSearchSortDTO> sorts) {
        StringBuilder sortSql = new StringBuilder();
        // 在SQL中添加排序信息
        if (sorts != null && sorts.size() > 0) {
            for (CommonSearchSortDTO sort : sorts) {
                sortSql.append(sort.getTagKey())
                        .append(" ")
                        .append(sort.isAsc() ? "ASC" : "DESC").append(",");
            }
            if (sortSql.length() > 0) {
                sortSql.deleteCharAt(sortSql.length() - 1);
            }
        }
        return sortSql.toString();
    }
    public String generateSearchConditions(CommonSearchQueryGroupDTO schema,Map<String, TagsDO> tableFieldPropertyMap) {
        StringBuilder conditions = new StringBuilder();
        if (tableFieldPropertyMap == null) {
            tableFieldPropertyMap = new HashMap();
        }
        if (schema != null) {
            boolean hasConditions = false;
            String joinFlag = "AND";
            if (CommonSearchPolicyTypeEnum.OR.name().equals(schema.getPolicyType())) {
                joinFlag = "OR";
            }
            conditions.append("( ");
            // 将单个的条件组装多个条件,用AND或OR操作符连接起来
            boolean notFirst = false;
            if (schema.getItemList() != null && schema.getItemList().size() > 0) {
                for (CommonSearchQueryItemDTO item : schema.getItemList()) {
                    TagsDO tagsDO = tableFieldPropertyMap.get(item.getTagKey());
                    String singleCondition = generateSingleSearchCondition(item, tagsDO);
                    if (StringUtils.isNotEmpty(singleCondition)) {
                        if (notFirst) {
                            conditions.append(" ").append(joinFlag).append(" ");
                        }
                        conditions.append(singleCondition);
                        notFirst = true;
                        hasConditions = true;
                    }
                }
            }
            // 如果还有多个Group,递归调用groupList生成其它的条件,不过递归调用时不传递排序和白名单、黑名单数据
            if (schema.getGroupList() != null && schema.getGroupList().size() > 0) {
                for (CommonSearchQueryGroupDTO group : schema.getGroupList()) {
                    String groupCondition = generateSearchConditions(group, tableFieldPropertyMap);
                    if (StringUtils.isNotEmpty(groupCondition)) {
                        if (notFirst) {
                            conditions.append(" ").append(joinFlag).append(" ");
                        }
                        conditions.append(groupCondition);
                        notFirst = true;
                        hasConditions = true;
                    }
                }
            }
            conditions.append(" )");
            // 如果最终发现没有条件,那么要删除conditions中已经初始化的内容
            if (false == hasConditions) {
                conditions.delete(0, conditions.length());
            }
        }
        return conditions.toString();
    }

    private static boolean valueIsString(TagsDO tagsDO) {
        if (tagsDO == null) {
            return false;
        }
        String type = tagsDO.getTagType();
        if ("STRING".equals(type)) {
            return true;
        }
        return false;
    }
    private static boolean valueIsBoolean(TagsDO tagsDO) {
        if (tagsDO == null) {
            return false;
        }
        String type = tagsDO.getTagType();
        if ("BOOLEAN".equals(type)) {
            return true;
        }
        return false;
    }
    private String generateSingleSearchCondition(CommonSearchQueryItemDTO item, TagsDO tagsDO) {
        if (item == null || tagsDO == null) {
            return "";
        }
        CommonSearchOperateEnum operateEnum = CommonSearchOperateEnum.index(item.getOperate());
        if (operateEnum == null) {
            return "";
        }
        switch (operateEnum) {
            case CONTAIN:
            case NOT_CONTAIN:
                return generateContain(item, tagsDO);
            case EQUAL:
            case NOT_EQUAL:
                return generateEqual(item, tagsDO);
            case RANGE:
                return generateRange(item, tagsDO);
            case LIKE:
                return generateLike(item, tagsDO);
            case EXIST:
                return generateExist(item, tagsDO);
            default:
                break;
        }
        return null;
    }
    private String generateExist(CommonSearchQueryItemDTO item, TagsDO tagsDO) {
        StringBuilder sb = new StringBuilder();
        String value = item.getValue();
        if (BooleanUtils.toBoolean(value)) {
            sb.append(item.getTagKey()).append(" IS NOT NULL");
        } else {
            sb.append(item.getTagKey()).append(" IS NULL");
        }
        return sb.toString();
    }
    private String generateLike(CommonSearchQueryItemDTO item, TagsDO tagsDO) {
        StringBuilder sb = new StringBuilder();
        String value = item.getValue();
        sb.append(item.getTagKey()).append(" LIKE '%").append(value).append("%'");
        return sb.toString();
    }
    private String generateRange(CommonSearchQueryItemDTO item, TagsDO tagsDO) {
        StringBuilder multiConditions = new StringBuilder();
        String joinFlag = "";
        if (StringUtils.isNotEmpty(item.getRange().getLt())) {
            String value = item.getRange().getLt();
            multiConditions.append(joinFlag).append(item.getTagKey()).append(" < ").append(value);
            joinFlag = " AND ";
        }
        if (StringUtils.isNotEmpty(item.getRange().getLte())) {
            String value = item.getRange().getLte();
            multiConditions.append(joinFlag).append(item.getTagKey()).append(" <= ").append(value);
            joinFlag = " AND ";
        }
        if (StringUtils.isNotEmpty(item.getRange().getGt())) {
            String value = item.getRange().getGt();
            multiConditions.append(joinFlag).append(item.getTagKey()).append(" > ").append(value);
            joinFlag = " AND ";
        }
        if (StringUtils.isNotEmpty(item.getRange().getGte())) {
            String value = item.getRange().getGte();
            multiConditions.append(joinFlag).append(item.getTagKey()).append(" >= ").append(value);
        }
        return multiConditions.toString();
    }
    private String generateEqual(CommonSearchQueryItemDTO item, TagsDO tagsDO) {
        CommonSearchOperateEnum operateEnum = CommonSearchOperateEnum.index(item.getOperate());
        String operator = "=";
        if (CommonSearchOperateEnum.NOT_EQUAL.equals(operateEnum)) {
            operator = "!=";
        }
        String value = item.getValue();
        if (valueIsString(tagsDO)) {
            value = "'" + value + "'";
        } else if (valueIsBoolean(tagsDO)) {
            value = String.valueOf(BooleanUtils.toBoolean(value));
        }
        return item.getTagKey() + " " + operator + " " + value;
    }

    private static String generateContain(CommonSearchQueryItemDTO item, TagsDO tagsDO) {
        CommonSearchOperateEnum operateEnum = CommonSearchOperateEnum.index(item.getOperate());
        if (likeTagKeyList.contains(item.getTagKey())) {
            List<String> values = new ArrayList<>();
            if (item.getTerms() != null && item.getTerms().size() > 0) {
                values.addAll(item.getTerms());
            } else if (StringUtils.isNotBlank(item.getValue())) {
                String trimValue = item.getValue();
                String[] valueStrings = trimValue.split(",");
                for (String value : valueStrings) {
                    if (StringUtils.isNotBlank(value)) {
                        values.add(value.trim());
                    }
                }
            }
            if (values != null && values.size() > 0) {
                StringBuilder sb = new StringBuilder();
                sb.append("(");
                int index = 0;
                for (String value : values) {
                    if (StringUtils.isBlank(value.trim())) {
                        continue;
                    }
                    if (index > 0) {
                        sb.append(" OR ");
                    }
                    index++;
                    String tempValue = value;
                    sb.append(item.getTagKey()).append(" like '%").append(tempValue).append("%'");
                }
                sb.append(")");
                if (CommonSearchOperateEnum.NOT_CONTAIN.equals(operateEnum)) {
                    return " not " + sb.toString();
                }
                return sb.toString();
            }
            return "";
        }
        // 包含还是不包含应该取terms属性的值,但是如果没有,那就取value,把value当做英文逗号分隔的多个值.
        StringBuilder values = new StringBuilder();
        if (item.getTerms() != null && item.getTerms().size() > 0) {
            if (valueIsString(tagsDO)) {
                item.getTerms().forEach(value -> {
                    String tempValue = value;
                    if (StringUtils.isNotBlank(tempValue)) {
                        values.append("'").append(tempValue).append("',");
                    }
                });
            } else {
                item.getTerms().forEach(value -> {
                    String tempValue = value;
                    if (StringUtils.isNotBlank(tempValue)) {
                        values.append(tempValue).append(",");
                    }
                });
            }
        } else if (StringUtils.isNotBlank(item.getValue())) {
            String trimValue = item.getValue();
            String[] valueStrings = trimValue.split(",");
            if (valueStrings != null && valueStrings.length > 0) {
                if (valueIsString(tagsDO)) {
                    for (String value : valueStrings) {
                        String tempValue = value;
                        if (StringUtils.isNotBlank(tempValue)) {
                            values.append("'").append(tempValue).append("',");
                        }
                    }
                } else {
                    for (String value : valueStrings) {
                        String tempValue = value;
                        if (StringUtils.isNotBlank(tempValue)) {
                            values.append(tempValue).append(",");
                        }
                    }
                }
            }
        }
        if (values.length() > 0) {
            values.deleteCharAt(values.length() - 1);
        }
        if (tagsDO.getIsMultiple() != null && tagsDO.getIsMultiple() == 1) {
            // ADB存的是多值使用多值语法
            if (CommonSearchOperateEnum.NOT_CONTAIN.equals(operateEnum)) {
                return "( ref(" + item.getTagKey() + ", 0) not in (" + values + ") or " + item.getTagKey() + " is null )";
            }
            // 使用ADB3的多值查询语法查询
            return "ref(" + item.getTagKey() + ", 0) in (" + values + ") ";
        } else {
            // ADB存的是单值使用in语法
            if (CommonSearchOperateEnum.NOT_CONTAIN.equals(operateEnum)) {
                return "(" + item.getTagKey() + " not in (" + values + ") or " + item.getTagKey() + " is null )";
            }
            return item.getTagKey() + " in (" + values + ") ";
        }
    }
}

下面是两个例子:

demo1: 性别=男 && 年龄在20-35之间 && (地域=北京||地域=上海) 的用户,返回结果按照id升序排列,年龄降序

{
  "groupDTO": {
    "policyType": "AND",
    "itemList": [
      {
        "tagKey": "sex",
        "operate": "EQUAL",
        "terms": null,
        "range": null,
        "value": "男性",
        "extraMap": null
      },
      {
        "tagKey": "age",
        "operate": null,
        "terms": null,
        "range": {
          "gt": null,
          "gte": "20",
          "lt": null,
          "lte": "35"
        },
        "value": null,
        "extraMap": null
      },
      {
        "tagKey": "area",
        "operate": "CONTAIN",
        "terms": ["北京", "上海"],
        "range": null,
        "value": null,
        "extraMap": null
      }
    ],
    "groupList": null
  },
  "searchSortDTOList": [
    {
      "tagkey": "id",
      "asc": true
    },
    {
      "tagkey": "age",
      "asc": false
    }
  ]
}

经过Adb3GenerateManager转换成SQL如下:

(sex = '男性' AND ref(area, 0) in ('北京', '上海')) order by id ASC, age DESC

4、系统架构图
#


💬评论