Skip to content

Commit

Permalink
支持QA问答解析
Browse files Browse the repository at this point in the history
  • Loading branch information
239573049 committed Apr 16, 2024
1 parent f65b01c commit add4964
Show file tree
Hide file tree
Showing 14 changed files with 1,223 additions and 78 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -19,4 +19,6 @@ public sealed class CreateWikiDetailsInput
public ProcessMode Mode { get; set; } = ProcessMode.Auto;

public TrainingPattern TrainingPattern { get; set; } = TrainingPattern.Subsection;

public string? QAPromptTemplate { get; set; }
}
Original file line number Diff line number Diff line change
Expand Up @@ -48,33 +48,32 @@ public async Task CreateWikiDetailsAsync(CreateWikiDetailsCommand command)
{
var wikiDetail = new WikiDetail(command.Input.WikiId, command.Input.Name, command.Input.FilePath,
command.Input.FileId, 0, "file");
wikiDetail.TrainingPattern = command.Input.TrainingPattern;
wikiDetail.Mode = command.Input.Mode;
wikiDetail.MaxTokensPerLine = command.Input.MaxTokensPerLine;
wikiDetail.MaxTokensPerParagraph = command.Input.MaxTokensPerParagraph;
wikiDetail.OverlappingTokens = command.Input.OverlappingTokens;
wikiDetail.QAPromptTemplate = command.Input.QAPromptTemplate;

wikiDetail = await wikiRepository.AddDetailsAsync(wikiDetail);

QuantizeWikiDetail quantizeWikiDetail = mapper.Map<QuantizeWikiDetail>(wikiDetail);
quantizeWikiDetail.OverlappingTokens = command.Input.OverlappingTokens;
quantizeWikiDetail.MaxTokensPerLine = command.Input.MaxTokensPerLine;
quantizeWikiDetail.MaxTokensPerParagraph = command.Input.MaxTokensPerParagraph;
quantizeWikiDetail.Mode = command.Input.Mode;
quantizeWikiDetail.TrainingPattern = command.Input.TrainingPattern;

await QuantizeBackgroundService.AddWikiDetailAsync(quantizeWikiDetail);
await QuantizeBackgroundService.AddWikiDetailAsync(wikiDetail);
}

[EventHandler]
public async Task CreateWikiDetailWebPageAsync(CreateWikiDetailWebPageCommand command)
{
var wikiDetail = new WikiDetail(command.Input.WikiId, command.Input.Name, command.Input.Path,
-1, 0, "web");
wikiDetail.OverlappingTokens = command.Input.OverlappingTokens;
wikiDetail.MaxTokensPerLine = command.Input.MaxTokensPerLine;
wikiDetail.MaxTokensPerParagraph = command.Input.MaxTokensPerParagraph;
wikiDetail.Mode = command.Input.Mode;
wikiDetail.TrainingPattern = command.Input.TrainingPattern;

wikiDetail = await wikiRepository.AddDetailsAsync(wikiDetail);

var quantizeWikiDetail = mapper.Map<QuantizeWikiDetail>(wikiDetail);
quantizeWikiDetail.OverlappingTokens = command.Input.OverlappingTokens;
quantizeWikiDetail.MaxTokensPerLine = command.Input.MaxTokensPerLine;
quantizeWikiDetail.MaxTokensPerParagraph = command.Input.MaxTokensPerParagraph;
quantizeWikiDetail.Mode = command.Input.Mode;
quantizeWikiDetail.TrainingPattern = command.Input.TrainingPattern;
var quantizeWikiDetail = mapper.Map<WikiDetail>(wikiDetail);

await QuantizeBackgroundService.AddWikiDetailAsync(quantizeWikiDetail);
}
Expand All @@ -85,14 +84,15 @@ public async Task CreateWikiDetailDataAsync(CreateWikiDetailDataCommand command)
var wikiDetail = new WikiDetail(command.Input.WikiId, command.Input.Name, command.Input.FilePath,
command.Input.FileId, 0, "data");

wikiDetail.OverlappingTokens = command.Input.OverlappingTokens;
wikiDetail.MaxTokensPerLine = command.Input.MaxTokensPerLine;
wikiDetail.MaxTokensPerParagraph = command.Input.MaxTokensPerParagraph;
wikiDetail.Mode = command.Input.Mode;
wikiDetail.TrainingPattern = command.Input.TrainingPattern;

wikiDetail = await wikiRepository.AddDetailsAsync(wikiDetail);

var quantizeWikiDetail = mapper.Map<QuantizeWikiDetail>(wikiDetail);
quantizeWikiDetail.OverlappingTokens = command.Input.OverlappingTokens;
quantizeWikiDetail.MaxTokensPerLine = command.Input.MaxTokensPerLine;
quantizeWikiDetail.MaxTokensPerParagraph = command.Input.MaxTokensPerParagraph;
quantizeWikiDetail.Mode = command.Input.Mode;
quantizeWikiDetail.TrainingPattern = command.Input.TrainingPattern;
var quantizeWikiDetail = mapper.Map<WikiDetail>(wikiDetail);

await QuantizeBackgroundService.AddWikiDetailAsync(quantizeWikiDetail);
}
Expand Down Expand Up @@ -143,18 +143,7 @@ public async Task RetryVectorDetailAsync(RetryVectorDetailCommand command)
throw new UserFriendlyException("未找到数据");
}

await QuantizeBackgroundService.AddWikiDetailAsync(new QuantizeWikiDetail()
{
Path = wikiDetail.Path,
WikiId = wikiDetail.WikiId,
TrainingPattern = TrainingPattern.Subsection,
FileName = wikiDetail.FileName,
Type = wikiDetail.Type,
MaxTokensPerParagraph = 1000,
MaxTokensPerLine = 300,
OverlappingTokens = 100,
FileId = wikiDetail.FileId,
});
await QuantizeBackgroundService.AddWikiDetailAsync(wikiDetail);
}

[EventHandler]
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,8 @@
using System.Collections.Concurrent;
using System.Threading.Channels;
using FastWiki.Service.Infrastructure.KM;
using FastWiki.Service.Service;
using Microsoft.KernelMemory.Handlers;

namespace FastWiki.Service.Backgrounds;

Expand All @@ -8,6 +11,11 @@ namespace FastWiki.Service.Backgrounds;
/// </summary>
public sealed class QuantizeBackgroundService : BackgroundService
{
/// <summary>
/// 线程安全字典
/// </summary>
public static ConcurrentDictionary<string, (WikiDetail, Wiki)> CacheWikiDetails { get; } = new();

private readonly IServiceProvider _serviceProvider;

/// <summary>
Expand All @@ -20,7 +28,7 @@ public sealed class QuantizeBackgroundService : BackgroundService
/// </summary>
private static int _maxTask = 1;

private static readonly Channel<QuantizeWikiDetail> WikiDetails = Channel.CreateBounded<QuantizeWikiDetail>(
private static readonly Channel<WikiDetail> WikiDetails = Channel.CreateBounded<WikiDetail>(
new BoundedChannelOptions(1000)
{
SingleReader = true,
Expand Down Expand Up @@ -69,7 +77,7 @@ protected override async Task ExecuteAsync(CancellationToken stoppingToken)
///
/// </summary>
/// <param name="wikiDetail"></param>
public static async Task AddWikiDetailAsync(QuantizeWikiDetail wikiDetail)
public static async Task AddWikiDetailAsync(WikiDetail wikiDetail)
{
await WikiDetails.Writer.WriteAsync(wikiDetail);
}
Expand All @@ -92,13 +100,16 @@ private async Task WikiDetailHandlerAsync()
/// </summary>
/// <param name="wikiDetail"></param>
/// <param name="service"></param>
private static async ValueTask HandlerAsync(QuantizeWikiDetail wikiDetail, IServiceProvider service)
private static async ValueTask HandlerAsync(WikiDetail wikiDetail, IServiceProvider service)
{
var fileStorageRepository = service.GetRequiredService<IFileStorageRepository>();
var wikiRepository = service.GetRequiredService<IWikiRepository>();
var wikiMemoryService = service.GetRequiredService<WikiMemoryService>();

var wiki = await wikiRepository.FindAsync(x => x.Id == wikiDetail.WikiId);

CacheWikiDetails.TryAdd(wikiDetail.Id.ToString(), new ValueTuple<WikiDetail, Wiki>(wikiDetail, wiki));

if (wikiDetail.Mode == ProcessMode.Auto)
{
wikiDetail.MaxTokensPerLine = 300;
Expand All @@ -113,6 +124,19 @@ private static async ValueTask HandlerAsync(QuantizeWikiDetail wikiDetail, IServ
try
{
Console.WriteLine($"开始量化:ʼ{wikiDetail.FileName} {wikiDetail.Path} {wikiDetail.FileId}");
List<string> step = new List<string>();
if (wikiDetail.TrainingPattern == TrainingPattern.QA)
{
var stepName = wikiDetail.Id.ToString();
serverless.Orchestrator.AddHandler<TextExtractionHandler>("extract_text");
serverless.Orchestrator.AddHandler<QAHandler>(stepName);
serverless.Orchestrator.AddHandler<GenerateEmbeddingsHandler>("generate_embeddings");
serverless.Orchestrator.AddHandler<SaveRecordsHandler>("save_memory_records");
step.Add("extract_text");
step.Add(stepName);
step.Add("generate_embeddings");
step.Add("save_memory_records");
}

string result = string.Empty;
if (wikiDetail.Type == "file")
Expand All @@ -132,7 +156,7 @@ private static async ValueTask HandlerAsync(QuantizeWikiDetail wikiDetail, IServ
{
"wikiDetailId", wikiDetail.Id.ToString()
}
}, "wiki");
}, "wiki", steps: step.ToArray());
}
else if (wikiDetail.Type == "web")
{
Expand All @@ -146,7 +170,7 @@ private static async ValueTask HandlerAsync(QuantizeWikiDetail wikiDetail, IServ
{
"wikiDetailId", wikiDetail.Id.ToString()
}
}, "wiki");
}, "wiki", steps: step.ToArray());
}
else if (wikiDetail.Type == "data")
{
Expand All @@ -160,7 +184,7 @@ private static async ValueTask HandlerAsync(QuantizeWikiDetail wikiDetail, IServ
{
"wikiDetailId", wikiDetail.Id.ToString()
}
}, "wiki");
}, "wiki", steps: step.ToArray());
}

await wikiRepository.UpdateDetailsState(wikiDetail.Id, WikiQuantizationState.Accomplish);
Expand All @@ -176,8 +200,13 @@ private static async ValueTask HandlerAsync(QuantizeWikiDetail wikiDetail, IServ
await wikiRepository.UpdateDetailsState(wikiDetail.Id, WikiQuantizationState.Fail);
}
}
finally
{
CacheWikiDetails.Remove(wikiDetail.Id.ToString(), out _);
}
}


private async Task LoadingWikiDetailAsync()
{
using var asyncServiceScope = _serviceProvider.CreateScope();
Expand All @@ -186,20 +215,7 @@ private async Task LoadingWikiDetailAsync()
var mapper = asyncServiceScope.ServiceProvider.GetRequiredService<IMapper>();
foreach (var wikiDetail in await wikiRepository.GetFailedDetailsAsync())
{
await AddWikiDetailAsync(mapper.Map<QuantizeWikiDetail>(wikiDetail));
await AddWikiDetailAsync(wikiDetail);
}
}
}

public sealed class QuantizeWikiDetail : WikiDetail
{
public int MaxTokensPerParagraph { get; set; }

public int MaxTokensPerLine { get; set; }

public int OverlappingTokens { get; set; }

public ProcessMode Mode { get; set; } = ProcessMode.Auto;

public TrainingPattern TrainingPattern { get; set; } = TrainingPattern.Subsection;
}
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,9 @@ public class WikiDetail : Entity<long>, IAuditEntity<long>
/// </summary>
public string Type { get; set; }

/// <summary>
/// 知识库状态
/// </summary>
public WikiQuantizationState State { get; set; }

public long Creator { get; set; }
Expand All @@ -47,6 +50,37 @@ public class WikiDetail : Entity<long>, IAuditEntity<long>

public DateTime ModificationTime { get; set; }


public int MaxTokensPerParagraph { get; set; }

public int MaxTokensPerLine { get; set; }

public int OverlappingTokens { get; set; }

public ProcessMode Mode { get; set; } = ProcessMode.Auto;

public TrainingPattern TrainingPattern { get; set; } = TrainingPattern.Subsection;

/// <summary>
/// QAPrompt模板
/// </summary>
public string? QAPromptTemplate { get; set; } =
""""
我会给你一段文本,学习它们,并整理学习成果,要求为:
1. 提出最多 20 个问题。
2. 给出每个问题的答案。
3. 答案要详细完整,答案可以包含普通文字、链接、代码、表格、公示、媒体链接等 markdown 元素。
4. 按格式返回多个问题和答案:
Q1: 问题。
A1: 答案。
Q2:
A2:
……
我的文本:"""{{$input}}"""
"""";

/// <inheritdoc />
public WikiDetail(long wikiId, string fileName, string path, long fileId, int dataCount, string type)
{
Expand All @@ -61,6 +95,5 @@ public WikiDetail(long wikiId, string fileName, string path, long fileId, int da

protected WikiDetail()
{

}
}
Loading

0 comments on commit add4964

Please sign in to comment.