bi优化-notes

2025-07-09

1. 背景问题

报表查询的常见痛点

时间范围查询慢：WHERE create_time BETWEEN '2024-01-01' AND '2024-01-31'
深度分页性能差：LIMIT 100000, 20
复杂聚合查询耗时：多维度统计和GROUP BY
索引选择不优：时间字段索引效率不如主键索引

2. 雪花算法ID优化方案

2.1 雪花算法ID的特性

时间递增性：包含时间戳信息，相近时间的ID数值相近
范围查询友好：可以通过ID范围快速定位特定时间段的数据
主键索引优势：基于主键索引，查询效率更高

2.2 时间范围转换为ID范围

/**
 * 时间转换为雪花ID（估算）
 */
private long timeToSnowflakeId(LocalDateTime dateTime) {
    long timestamp = dateTime.toInstant(ZoneOffset.of("+8")).toEpochMilli();
    long epoch = 1288834974657L; // 雪花算法的epoch时间
    long snowflakeTimestamp = timestamp - epoch;
    return snowflakeTimestamp << 22;
}

/**
 * 按月份统计订单数据（使用ID范围优化）
 */
public OrderMonthlyReport getMonthlyReport(int year, int month) {
    LocalDateTime startTime = LocalDateTime.of(year, month, 1, 0, 0, 0);
    LocalDateTime endTime = startTime.plusMonths(1).minusSeconds(1);
    
    // 转换为雪花ID范围
    long startId = timeToSnowflakeId(startTime);
    long endId = timeToSnowflakeId(endTime);
    
    // 使用ID范围查询（比时间范围查询更快）
    List<OrderInfo> orders = orderInfoMapper.selectByIdRange(startId, endId);
    
    return aggregateOrderData(orders);
}

3. 数据聚合策略

3.1 分层聚合数据表

1
2
3

- 日报表（daily_report）：保留最近3个月
- 月报表（monthly_report）：保留最近2年  
- 年报表（yearly_report）：永久保留

3.2 定时聚合任务

@Component
public class ReportAggregator {
    
    @Scheduled(cron = "0 0 1 * * ?") // 每天凌晨1点执行
    public void aggregateDailyReport() {
        LocalDate yesterday = LocalDate.now().minusDays(1);
        
        // 检查是否已经存在该日期的报表
        DailyReport existingReport = dailyReportMapper.selectByDate(yesterday);
        if (existingReport != null) {
            return;
        }
        
        // 使用雪花ID范围聚合数据
        long startId = timeToSnowflakeId(yesterday.atStartOfDay());
        long endId = timeToSnowflakeId(yesterday.plusDays(1).atStartOfDay());
        
        List<OrderInfo> orders = orderInfoMapper.selectByIdRange(startId, endId);
        DailyReport report = aggregateAndSave(orders, yesterday);
        
        dailyReportMapper.insert(report);
    }
}

3.3 数据保留策略

@Scheduled(cron = "0 0 2 * * ?") // 每天凌晨2点清理
public void cleanupExpiredData() {
    // 清理日报表（保留90天）
    LocalDate cutoff = LocalDate.now().minusDays(90);
    dailyReportMapper.deleteByDateBefore(cutoff);
    
    // 清理月报表（保留24个月）
    LocalDate monthlyCutoff = LocalDate.now().minusMonths(24);
    monthlyReportMapper.deleteByDateBefore(monthlyCutoff);
}

4. 智能查询策略

4.1 多级缓存查询

public OrderReport getOrderReport(LocalDate startDate, LocalDate endDate) {
    String cacheKey = buildCacheKey(startDate, endDate);
    
    // 1. 先查 Redis 缓存
    OrderReport cachedReport = (OrderReport) redisTemplate.opsForValue().get(cacheKey);
    if (cachedReport != null) {
        return cachedReport;
    }
    
    // 2. 查询聚合表
    OrderReport report = getReportFromAggregateTable(startDate, endDate);
    if (report != null) {
        redisTemplate.opsForValue().set(cacheKey, report, Duration.ofHours(1));
        return report;
    }
    
    // 3. 实时计算
    report = calculateRealTimeReport(startDate, endDate);
    redisTemplate.opsForValue().set(cacheKey, report, Duration.ofMinutes(10));
    
    return report;
}

4.2 混合查询策略

/**
 * 智能查询：优先查聚合表，缺失数据实时计算
 */
public OrderReport getOrderReport(LocalDate startDate, LocalDate endDate) {
    // 1. 查询日报表
    List<DailyReport> dailyReports = dailyReportMapper.selectByDateRange(startDate, endDate);
    
    // 2. 检查数据完整性
    Set<LocalDate> existingDates = dailyReports.stream()
            .map(DailyReport::getReportDate)
            .collect(Collectors.toSet());
    
    List<LocalDate> missingDates = getAllDatesBetween(startDate, endDate)
            .stream()
            .filter(date -> !existingDates.contains(date))
            .collect(Collectors.toList());
    
    if (missingDates.isEmpty()) {
        // 数据完整，直接聚合
        return aggregateFromDailyReports(dailyReports);
    } else {
        // 补充缺失数据
        return supplementMissingData(dailyReports, missingDates);
    }
}

5. 大数据量处理优化

5.1 分段查询

/**
 * 分段查询大数据量报表（避免内存溢出）
 */
public List<OrderDailyReport> getDailyReportBySegment(LocalDate startDate, LocalDate endDate) {
    List<OrderDailyReport> reports = new ArrayList<>();
    
    // 按天分段查询
    LocalDate currentDate = startDate;
    while (!currentDate.isAfter(endDate)) {
        long startId = timeToSnowflakeId(currentDate.atStartOfDay());
        long endId = timeToSnowflakeId(currentDate.plusDays(1).atStartOfDay());
        
        List<OrderInfo> dayOrders = orderInfoMapper.selectByIdRange(startId, endId);
        OrderDailyReport dayReport = aggregateDayData(dayOrders, currentDate);
        reports.add(dayReport);
        
        currentDate = currentDate.plusDays(1);
    }
    
    return reports;
}

5.2 流式处理

/**
 * 流式处理大数据量报表
 */
public void processLargeReport(ReportRequest request) {
    long startId = timeToSnowflakeId(request.getStartTime());
    long endId = timeToSnowflakeId(request.getEndTime());
    
    long batchSize = 10000;
    long currentId = startId;
    
    while (currentId < endId) {
        long nextId = Math.min(currentId + batchSize, endId);
        
        // 批量查询
        List<OrderInfo> batch = orderInfoMapper.selectByIdRange(currentId, nextId);
        
        // 流式处理
        processReportBatch(batch);
        
        currentId = nextId + 1;
    }
}

6. 异步处理方案

6.1 大报表异步处理

@RestController
@RequestMapping("/report")
public class ReportController {
    
    @GetMapping("/orders")
    public Result<OrderReport> getOrderReport(
            @RequestParam LocalDate startDate,
            @RequestParam LocalDate endDate) {
        
        // 大数据量报表异步处理
        if (ChronoUnit.DAYS.between(startDate, endDate) > 30) {
            return processLargeReportAsync(startDate, endDate);
        }
        
        // 小数据量报表同步处理
        OrderReport report = reportService.getOrderReportByIdRange(startDate, endDate);
        return Result.success(report);
    }
    
    private Result<OrderReport> processLargeReportAsync(LocalDate startDate, LocalDate endDate) {
        String taskId = UUID.randomUUID().toString();
        reportService.submitAsyncReportTask(taskId, startDate, endDate);
        
        return Result.success("报表生成中，任务ID: " + taskId);
    }
}

6.2 异步任务实现

@Service
public class ReportService {
    
    @Autowired
    private ThreadPoolTaskExecutor asyncExecutor;
    
    @Autowired
    private RedisTemplate<String, Object> redisTemplate;
    
    /**
     * 提交异步报表任务
     */
    public void submitAsyncReportTask(String taskId, LocalDate startDate, LocalDate endDate) {
        // 1. 在Redis中记录任务状态
        ReportTask task = new ReportTask();
        task.setTaskId(taskId);
        task.setStartDate(startDate);
        task.setEndDate(endDate);
        task.setStatus("PROCESSING");
        task.setCreateTime(LocalDateTime.now());
        
        redisTemplate.opsForValue().set("report:task:" + taskId, task, Duration.ofHours(24));
        
        // 2. 提交到线程池异步执行
        asyncExecutor.submit(() -> {
            try {
                processAsyncReport(taskId, startDate, endDate);
            } catch (Exception e) {
                handleAsyncReportError(taskId, e);
            }
        });
    }
    
    /**
     * 异步报表处理逻辑
     */
    private void processAsyncReport(String taskId, LocalDate startDate, LocalDate endDate) {
        try {
            log.info("开始处理异步报表任务: {}, 时间范围: {} - {}", taskId, startDate, endDate);
            
            // 1. 分段处理大数据量
            List<OrderDailyReport> reports = getDailyReportBySegment(startDate, endDate);
            
            // 2. 聚合最终报表
            OrderReport finalReport = aggregateSegmentReports(reports);
            
            // 3. 更新任务状态
            updateTaskStatus(taskId, "COMPLETED", finalReport);
            
            log.info("异步报表任务完成: {}", taskId);
            
        } catch (Exception e) {
            log.error("异步报表任务失败: {}", taskId, e);
            throw e;
        }
    }
    
    /**
     * 处理异步报表错误
     */
    private void handleAsyncReportError(String taskId, Exception e) {
        ReportTask task = new ReportTask();
        task.setTaskId(taskId);
        task.setStatus("FAILED");
        task.setErrorMessage(e.getMessage());
        task.setUpdateTime(LocalDateTime.now());
        
        redisTemplate.opsForValue().set("report:task:" + taskId, task, Duration.ofHours(24));
        
        // 发送告警通知
        alertService.sendAlert("异步报表任务失败", "任务ID: " + taskId + ", 错误: " + e.getMessage());
    }
    
    /**
     * 更新任务状态
     */
    private void updateTaskStatus(String taskId, String status, OrderReport report) {
        ReportTask task = (ReportTask) redisTemplate.opsForValue().get("report:task:" + taskId);
        if (task != null) {
            task.setStatus(status);
            task.setUpdateTime(LocalDateTime.now());
            task.setResult(report);
            redisTemplate.opsForValue().set("report:task:" + taskId, task, Duration.ofHours(24));
        }
    }
    
    /**
     * 查询异步任务状态
     */
    public ReportTask getTaskStatus(String taskId) {
        return (ReportTask) redisTemplate.opsForValue().get("report:task:" + taskId);
    }
}

6.3 任务状态查询接口

@RestController
@RequestMapping("/report")
public class ReportController {
    
    /**
     * 查询异步报表任务状态
     */
    @GetMapping("/task/{taskId}")
    public Result<ReportTask> getTaskStatus(@PathVariable String taskId) {
        ReportTask task = reportService.getTaskStatus(taskId);
        
        if (task == null) {
            return Result.error("任务不存在或已过期");
        }
        
        return Result.success(task);
    }
    
    /**
     * 下载异步报表结果
     */
    @GetMapping("/task/{taskId}/download")
    public ResponseEntity<Resource> downloadReport(@PathVariable String taskId) {
        ReportTask task = reportService.getTaskStatus(taskId);
        
        if (task == null || !"COMPLETED".equals(task.getStatus())) {
            return ResponseEntity.notFound().build();
        }
        
        // 生成Excel文件
        ByteArrayResource resource = reportService.generateExcelReport(task.getResult());
        
        return ResponseEntity.ok()
                .header(HttpHeaders.CONTENT_DISPOSITION, "attachment; filename=report-" + taskId + ".xlsx")
                .contentType(MediaType.APPLICATION_OCTET_STREAM)
                .body(resource);
    }
}

6.4 任务实体类

@Data
public class ReportTask {
    private String taskId;
    private LocalDate startDate;
    private LocalDate endDate;
    private String status; // PROCESSING, COMPLETED, FAILED
    private String errorMessage;
    private LocalDateTime createTime;
    private LocalDateTime updateTime;
    private OrderReport result;
    private int progress; // 处理进度百分比
}

6.5 线程池配置

@Configuration
@EnableAsync
public class AsyncConfig {
    
    @Bean("asyncExecutor")
    public ThreadPoolTaskExecutor asyncExecutor() {
        ThreadPoolTaskExecutor executor = new ThreadPoolTaskExecutor();
        executor.setCorePoolSize(5);
        executor.setMaxPoolSize(10);
        executor.setQueueCapacity(100);
        executor.setKeepAliveSeconds(60);
        executor.setThreadNamePrefix("async-report-");
        executor.setRejectedExecutionHandler(new ThreadPoolExecutor.CallerRunsPolicy());
        executor.initialize();
        return executor;
    }
}

7. 配置化管理

7.1 报表配置

# application.yml
report:
  retention:
    daily:
      enabled: true
      days: 90  # 日报表保留90天
    monthly:
      enabled: true
      months: 24  # 月报表保留24个月
    yearly:
      enabled: true
      years: -1  # 年报表永久保留
  
  cleanup:
    enabled: true
    schedule: "0 0 2 * * ?"  # 每天凌晨2点清理

7.2 配置类

@Component
@ConfigurationProperties(prefix = "report")
public class ReportRetentionConfig {
    
    private Retention retention;
    private Cleanup cleanup;
    
    @Data
    public static class Retention {
        private Daily daily;
        private Monthly monthly;
        private Yearly yearly;
    }
    
    @Data
    public static class Daily {
        private boolean enabled;
        private int days;
    }
}

8. 性能提升效果

8.1 查询性能优化

主键索引优势：比时间字段索引更高效
避免全表扫描：ID范围查询能更好地利用索引
减少锁竞争：按ID范围查询减少了锁的竞争
支持分页优化：避免深度分页的性能问题

8.2 存储优化

分层存储：不同精度的数据分别存储
定期清理：避免数据无限膨胀
压缩比例：聚合数据大幅减少存储空间

8.3 系统负载优化

缓存命中率提升：热点数据缓存
数据库压力减轻：减少实时计算
响应时间优化：多级查询策略

9. 实施建议

9.1 优先级顺序

Redis缓存 → 聚合表 → 实时计算
数据完整性检查：检查聚合表中是否有完整的数据
部分数据缺失：采用混合查询策略
灵活降级：支持实时数据补充

9.2 监控指标

查询响应时间
缓存命中率
聚合任务执行情况
数据完整性检查

9.3 注意事项

时间估算精度：雪花ID时间估算可能有误差
数据一致性：确保聚合数据与原始数据一致
异常处理：聚合任务失败时的处理机制
容量规划：根据业务量合理规划存储和清理策略

10. 总结

通过基于雪花算法ID的报表优化方案，可以：

显著提升查询性能：利用主键索引和范围查询
减少系统负载：通过分层聚合和缓存策略
提高可维护性：配置化管理和异步处理
保证数据准确性：混合查询策略和数据完整性检查

这种方案特别适合于大数据量的报表查询场景，能够有效解决传统时间范围查询的性能瓶颈问题。