bi优化-notes

1. 背景问题

报表查询的常见痛点

  • 时间范围查询慢WHERE create_time BETWEEN '2024-01-01' AND '2024-01-31'
  • 深度分页性能差LIMIT 100000, 20
  • 复杂聚合查询耗时:多维度统计和GROUP BY
  • 索引选择不优:时间字段索引效率不如主键索引

2. 雪花算法ID优化方案

2.1 雪花算法ID的特性

  • 时间递增性:包含时间戳信息,相近时间的ID数值相近
  • 范围查询友好:可以通过ID范围快速定位特定时间段的数据
  • 主键索引优势:基于主键索引,查询效率更高

2.2 时间范围转换为ID范围

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
/**
* 时间转换为雪花ID(估算)
*/
private long timeToSnowflakeId(LocalDateTime dateTime) {
long timestamp = dateTime.toInstant(ZoneOffset.of("+8")).toEpochMilli();
long epoch = 1288834974657L; // 雪花算法的epoch时间
long snowflakeTimestamp = timestamp - epoch;
return snowflakeTimestamp << 22;
}

/**
* 按月份统计订单数据(使用ID范围优化)
*/
public OrderMonthlyReport getMonthlyReport(int year, int month) {
LocalDateTime startTime = LocalDateTime.of(year, month, 1, 0, 0, 0);
LocalDateTime endTime = startTime.plusMonths(1).minusSeconds(1);

// 转换为雪花ID范围
long startId = timeToSnowflakeId(startTime);
long endId = timeToSnowflakeId(endTime);

// 使用ID范围查询(比时间范围查询更快)
List<OrderInfo> orders = orderInfoMapper.selectByIdRange(startId, endId);

return aggregateOrderData(orders);
}

3. 数据聚合策略

3.1 分层聚合数据表

1
2
3
- 日报表(daily_report):保留最近3个月
- 月报表(monthly_report):保留最近2年
- 年报表(yearly_report):永久保留

3.2 定时聚合任务

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
@Component
public class ReportAggregator {

@Scheduled(cron = "0 0 1 * * ?") // 每天凌晨1点执行
public void aggregateDailyReport() {
LocalDate yesterday = LocalDate.now().minusDays(1);

// 检查是否已经存在该日期的报表
DailyReport existingReport = dailyReportMapper.selectByDate(yesterday);
if (existingReport != null) {
return;
}

// 使用雪花ID范围聚合数据
long startId = timeToSnowflakeId(yesterday.atStartOfDay());
long endId = timeToSnowflakeId(yesterday.plusDays(1).atStartOfDay());

List<OrderInfo> orders = orderInfoMapper.selectByIdRange(startId, endId);
DailyReport report = aggregateAndSave(orders, yesterday);

dailyReportMapper.insert(report);
}
}

3.3 数据保留策略

1
2
3
4
5
6
7
8
9
10
@Scheduled(cron = "0 0 2 * * ?") // 每天凌晨2点清理
public void cleanupExpiredData() {
// 清理日报表(保留90天)
LocalDate cutoff = LocalDate.now().minusDays(90);
dailyReportMapper.deleteByDateBefore(cutoff);

// 清理月报表(保留24个月)
LocalDate monthlyCutoff = LocalDate.now().minusMonths(24);
monthlyReportMapper.deleteByDateBefore(monthlyCutoff);
}

4. 智能查询策略

4.1 多级缓存查询

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
public OrderReport getOrderReport(LocalDate startDate, LocalDate endDate) {
String cacheKey = buildCacheKey(startDate, endDate);

// 1. 先查 Redis 缓存
OrderReport cachedReport = (OrderReport) redisTemplate.opsForValue().get(cacheKey);
if (cachedReport != null) {
return cachedReport;
}

// 2. 查询聚合表
OrderReport report = getReportFromAggregateTable(startDate, endDate);
if (report != null) {
redisTemplate.opsForValue().set(cacheKey, report, Duration.ofHours(1));
return report;
}

// 3. 实时计算
report = calculateRealTimeReport(startDate, endDate);
redisTemplate.opsForValue().set(cacheKey, report, Duration.ofMinutes(10));

return report;
}

4.2 混合查询策略

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
/**
* 智能查询:优先查聚合表,缺失数据实时计算
*/
public OrderReport getOrderReport(LocalDate startDate, LocalDate endDate) {
// 1. 查询日报表
List<DailyReport> dailyReports = dailyReportMapper.selectByDateRange(startDate, endDate);

// 2. 检查数据完整性
Set<LocalDate> existingDates = dailyReports.stream()
.map(DailyReport::getReportDate)
.collect(Collectors.toSet());

List<LocalDate> missingDates = getAllDatesBetween(startDate, endDate)
.stream()
.filter(date -> !existingDates.contains(date))
.collect(Collectors.toList());

if (missingDates.isEmpty()) {
// 数据完整,直接聚合
return aggregateFromDailyReports(dailyReports);
} else {
// 补充缺失数据
return supplementMissingData(dailyReports, missingDates);
}
}

5. 大数据量处理优化

5.1 分段查询

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
/**
* 分段查询大数据量报表(避免内存溢出)
*/
public List<OrderDailyReport> getDailyReportBySegment(LocalDate startDate, LocalDate endDate) {
List<OrderDailyReport> reports = new ArrayList<>();

// 按天分段查询
LocalDate currentDate = startDate;
while (!currentDate.isAfter(endDate)) {
long startId = timeToSnowflakeId(currentDate.atStartOfDay());
long endId = timeToSnowflakeId(currentDate.plusDays(1).atStartOfDay());

List<OrderInfo> dayOrders = orderInfoMapper.selectByIdRange(startId, endId);
OrderDailyReport dayReport = aggregateDayData(dayOrders, currentDate);
reports.add(dayReport);

currentDate = currentDate.plusDays(1);
}

return reports;
}

5.2 流式处理

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
/**
* 流式处理大数据量报表
*/
public void processLargeReport(ReportRequest request) {
long startId = timeToSnowflakeId(request.getStartTime());
long endId = timeToSnowflakeId(request.getEndTime());

long batchSize = 10000;
long currentId = startId;

while (currentId < endId) {
long nextId = Math.min(currentId + batchSize, endId);

// 批量查询
List<OrderInfo> batch = orderInfoMapper.selectByIdRange(currentId, nextId);

// 流式处理
processReportBatch(batch);

currentId = nextId + 1;
}
}

6. 异步处理方案

6.1 大报表异步处理

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
@RestController
@RequestMapping("/report")
public class ReportController {

@GetMapping("/orders")
public Result<OrderReport> getOrderReport(
@RequestParam LocalDate startDate,
@RequestParam LocalDate endDate) {

// 大数据量报表异步处理
if (ChronoUnit.DAYS.between(startDate, endDate) > 30) {
return processLargeReportAsync(startDate, endDate);
}

// 小数据量报表同步处理
OrderReport report = reportService.getOrderReportByIdRange(startDate, endDate);
return Result.success(report);
}

private Result<OrderReport> processLargeReportAsync(LocalDate startDate, LocalDate endDate) {
String taskId = UUID.randomUUID().toString();
reportService.submitAsyncReportTask(taskId, startDate, endDate);

return Result.success("报表生成中,任务ID: " + taskId);
}
}

6.2 异步任务实现

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
@Service
public class ReportService {

@Autowired
private ThreadPoolTaskExecutor asyncExecutor;

@Autowired
private RedisTemplate<String, Object> redisTemplate;

/**
* 提交异步报表任务
*/
public void submitAsyncReportTask(String taskId, LocalDate startDate, LocalDate endDate) {
// 1. 在Redis中记录任务状态
ReportTask task = new ReportTask();
task.setTaskId(taskId);
task.setStartDate(startDate);
task.setEndDate(endDate);
task.setStatus("PROCESSING");
task.setCreateTime(LocalDateTime.now());

redisTemplate.opsForValue().set("report:task:" + taskId, task, Duration.ofHours(24));

// 2. 提交到线程池异步执行
asyncExecutor.submit(() -> {
try {
processAsyncReport(taskId, startDate, endDate);
} catch (Exception e) {
handleAsyncReportError(taskId, e);
}
});
}

/**
* 异步报表处理逻辑
*/
private void processAsyncReport(String taskId, LocalDate startDate, LocalDate endDate) {
try {
log.info("开始处理异步报表任务: {}, 时间范围: {} - {}", taskId, startDate, endDate);

// 1. 分段处理大数据量
List<OrderDailyReport> reports = getDailyReportBySegment(startDate, endDate);

// 2. 聚合最终报表
OrderReport finalReport = aggregateSegmentReports(reports);

// 3. 更新任务状态
updateTaskStatus(taskId, "COMPLETED", finalReport);

log.info("异步报表任务完成: {}", taskId);

} catch (Exception e) {
log.error("异步报表任务失败: {}", taskId, e);
throw e;
}
}

/**
* 处理异步报表错误
*/
private void handleAsyncReportError(String taskId, Exception e) {
ReportTask task = new ReportTask();
task.setTaskId(taskId);
task.setStatus("FAILED");
task.setErrorMessage(e.getMessage());
task.setUpdateTime(LocalDateTime.now());

redisTemplate.opsForValue().set("report:task:" + taskId, task, Duration.ofHours(24));

// 发送告警通知
alertService.sendAlert("异步报表任务失败", "任务ID: " + taskId + ", 错误: " + e.getMessage());
}

/**
* 更新任务状态
*/
private void updateTaskStatus(String taskId, String status, OrderReport report) {
ReportTask task = (ReportTask) redisTemplate.opsForValue().get("report:task:" + taskId);
if (task != null) {
task.setStatus(status);
task.setUpdateTime(LocalDateTime.now());
task.setResult(report);
redisTemplate.opsForValue().set("report:task:" + taskId, task, Duration.ofHours(24));
}
}

/**
* 查询异步任务状态
*/
public ReportTask getTaskStatus(String taskId) {
return (ReportTask) redisTemplate.opsForValue().get("report:task:" + taskId);
}
}

6.3 任务状态查询接口

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
@RestController
@RequestMapping("/report")
public class ReportController {

/**
* 查询异步报表任务状态
*/
@GetMapping("/task/{taskId}")
public Result<ReportTask> getTaskStatus(@PathVariable String taskId) {
ReportTask task = reportService.getTaskStatus(taskId);

if (task == null) {
return Result.error("任务不存在或已过期");
}

return Result.success(task);
}

/**
* 下载异步报表结果
*/
@GetMapping("/task/{taskId}/download")
public ResponseEntity<Resource> downloadReport(@PathVariable String taskId) {
ReportTask task = reportService.getTaskStatus(taskId);

if (task == null || !"COMPLETED".equals(task.getStatus())) {
return ResponseEntity.notFound().build();
}

// 生成Excel文件
ByteArrayResource resource = reportService.generateExcelReport(task.getResult());

return ResponseEntity.ok()
.header(HttpHeaders.CONTENT_DISPOSITION, "attachment; filename=report-" + taskId + ".xlsx")
.contentType(MediaType.APPLICATION_OCTET_STREAM)
.body(resource);
}
}

6.4 任务实体类

1
2
3
4
5
6
7
8
9
10
11
12
@Data
public class ReportTask {
private String taskId;
private LocalDate startDate;
private LocalDate endDate;
private String status; // PROCESSING, COMPLETED, FAILED
private String errorMessage;
private LocalDateTime createTime;
private LocalDateTime updateTime;
private OrderReport result;
private int progress; // 处理进度百分比
}

6.5 线程池配置

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
@Configuration
@EnableAsync
public class AsyncConfig {

@Bean("asyncExecutor")
public ThreadPoolTaskExecutor asyncExecutor() {
ThreadPoolTaskExecutor executor = new ThreadPoolTaskExecutor();
executor.setCorePoolSize(5);
executor.setMaxPoolSize(10);
executor.setQueueCapacity(100);
executor.setKeepAliveSeconds(60);
executor.setThreadNamePrefix("async-report-");
executor.setRejectedExecutionHandler(new ThreadPoolExecutor.CallerRunsPolicy());
executor.initialize();
return executor;
}
}

7. 配置化管理

7.1 报表配置

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
# application.yml
report:
retention:
daily:
enabled: true
days: 90 # 日报表保留90天
monthly:
enabled: true
months: 24 # 月报表保留24个月
yearly:
enabled: true
years: -1 # 年报表永久保留

cleanup:
enabled: true
schedule: "0 0 2 * * ?" # 每天凌晨2点清理

7.2 配置类

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
@Component
@ConfigurationProperties(prefix = "report")
public class ReportRetentionConfig {

private Retention retention;
private Cleanup cleanup;

@Data
public static class Retention {
private Daily daily;
private Monthly monthly;
private Yearly yearly;
}

@Data
public static class Daily {
private boolean enabled;
private int days;
}
}

8. 性能提升效果

8.1 查询性能优化

  • 主键索引优势:比时间字段索引更高效
  • 避免全表扫描:ID范围查询能更好地利用索引
  • 减少锁竞争:按ID范围查询减少了锁的竞争
  • 支持分页优化:避免深度分页的性能问题

8.2 存储优化

  • 分层存储:不同精度的数据分别存储
  • 定期清理:避免数据无限膨胀
  • 压缩比例:聚合数据大幅减少存储空间

8.3 系统负载优化

  • 缓存命中率提升:热点数据缓存
  • 数据库压力减轻:减少实时计算
  • 响应时间优化:多级查询策略

9. 实施建议

9.1 优先级顺序

  1. Redis缓存聚合表实时计算
  2. 数据完整性检查:检查聚合表中是否有完整的数据
  3. 部分数据缺失:采用混合查询策略
  4. 灵活降级:支持实时数据补充

9.2 监控指标

  • 查询响应时间
  • 缓存命中率
  • 聚合任务执行情况
  • 数据完整性检查

9.3 注意事项

  • 时间估算精度:雪花ID时间估算可能有误差
  • 数据一致性:确保聚合数据与原始数据一致
  • 异常处理:聚合任务失败时的处理机制
  • 容量规划:根据业务量合理规划存储和清理策略

10. 总结

通过基于雪花算法ID的报表优化方案,可以:

  • 显著提升查询性能:利用主键索引和范围查询
  • 减少系统负载:通过分层聚合和缓存策略
  • 提高可维护性:配置化管理和异步处理
  • 保证数据准确性:混合查询策略和数据完整性检查

这种方案特别适合于大数据量的报表查询场景,能够有效解决传统时间范围查询的性能瓶颈问题。