Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fast insert eav #81

Merged
merged 8 commits into from
Sep 3, 2024
Merged
Show file tree
Hide file tree
Changes from 6 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
11 changes: 7 additions & 4 deletions AnalysisData/AnalysisData/ConfigService.cs
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,8 @@
using AnalysisData.EAV.Service.GraphServices.NodeAndEdgeServices;
using AnalysisData.EAV.Service.GraphServices.Relationship;
using AnalysisData.EAV.Service.GraphSevices;
using AnalysisData.Graph.Service.ServiceBusiness;
using AnalysisData.Graph.Service.ServiceBusiness.Abstraction;
using AnalysisData.JwtService.abstractions;
using AnalysisData.Repository.RoleRepository;
using AnalysisData.Repository.RoleRepository.Abstraction;
Expand Down Expand Up @@ -56,12 +58,11 @@ public static IServiceCollection AddServices(this IServiceCollection services)
services.AddScoped<ICookieService, CookieService.CookieService>();
services.AddScoped<IPermissionService, PermissionService>();
services.AddScoped<IValidationService, ValidationService>();
services.AddScoped<IEdgeToDbService, EdgeToDbService>();
services.AddScoped<INodeToDbService, NodeToDbService>();
services.AddScoped<ICsvReaderService, CsvReaderService>();
services.AddScoped<IEdgeRecordProcessor, EdgeRecordProcessor>();
services.AddScoped<IHeaderProcessor, HeaderProcessor>();
services.AddScoped<INodeRecordProcessor, NodeRecordProcessor>();
services.AddScoped<INodeRecordProcessor, EntityNodeRecordProcessor>();
services.AddScoped<IFromToProcessor, FromToProcessor>();
services.AddScoped<INodePaginationService, NodePaginationService>();
services.AddScoped<IAdminService, AdminService>();
Expand All @@ -77,8 +78,10 @@ public static IServiceCollection AddServices(this IServiceCollection services)
services.AddScoped<IPasswordHasher, PasswordHasher>();
services.AddScoped<IS3FileStorageService, S3FileStorageService>();
services.AddScoped<IUploadImageService, UploadImageService>();


services.AddScoped<IValueNodeProcessor, ValueNodeProcessor>();
services.AddScoped<IEntityEdgeRecordProcessor, EntityEdgeRecordProcessor>();
services.AddScoped<IValueEdgeProcessor, ValueEdgeProcessor>();
services.AddScoped<IEdgeToDbService, EdgeToDbService>();
return services;
}
}
16 changes: 13 additions & 3 deletions AnalysisData/AnalysisData/Graph/Model/Edge/EntityEdge.cs
Original file line number Diff line number Diff line change
@@ -1,12 +1,22 @@
using System.ComponentModel.DataAnnotations;
using System.ComponentModel.DataAnnotations.Schema;

namespace AnalysisData.EAV.Model;

public class EntityEdge
{
[Key] public int Id { get; set; }
[Key]
public int Id { get; set; }

[Required] public int EntityIDSource { get; set; }
[Required]
public int EntityIDSource { get; set; }

[Required] public int EntityIDTarget { get; set; }
[ForeignKey("EntityIDSource")]
public EntityNode SourceNode { get; set; }

[Required]
public int EntityIDTarget { get; set; }

[ForeignKey("EntityIDTarget")]
public EntityNode TargetNode { get; set; }
}
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ namespace AnalysisData.EAV.Repository.EdgeRepository.Abstraction;
public interface IAttributeEdgeRepository
{
Task AddAsync(AttributeEdge entity);
Task AddRangeAsync(IEnumerable<AttributeEdge> attributeEdges);
Task<IEnumerable<AttributeEdge>> GetAllAsync();
Task<AttributeEdge> GetByIdAsync(int id);
Task<AttributeEdge> GetByNameAsync(string name);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ namespace AnalysisData.EAV.Repository.EdgeRepository.Abstraction;
public interface IEntityEdgeRepository
{
Task AddAsync(EntityEdge entity);
Task AddRangeAsync(IEnumerable<EntityEdge> entityEdges);
Task<List<EntityEdge>> FindNodeLoopsAsync(int id);
Task<IEnumerable<EntityEdge>> GetAllAsync();
Task<EntityEdge> GetByIdAsync(int id);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ namespace AnalysisData.EAV.Repository.EdgeRepository.Abstraction;
public interface IValueEdgeRepository
{
Task AddAsync(ValueEdge entity);
Task AddRangeAsync(IEnumerable<ValueEdge> valueEdges);
Task<IEnumerable<ValueEdge>> GetAllAsync();
Task<ValueEdge> GetByIdAsync(int id);
Task DeleteAsync(int id);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,11 @@ public async Task AddAsync(AttributeEdge entity)
await _context.AttributeEdges.AddAsync(entity);
await _context.SaveChangesAsync();
}

public async Task AddRangeAsync(IEnumerable<AttributeEdge> attributeEdges)
{
await _context.AttributeEdges.AddRangeAsync(attributeEdges);
await _context.SaveChangesAsync();
}
public async Task<IEnumerable<AttributeEdge>> GetAllAsync()
{
return await _context.AttributeEdges.ToListAsync();
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,11 @@ public async Task<IEnumerable<EntityEdge>> GetAllAsync()
{
return await _context.EntityEdges.ToListAsync();
}

public async Task AddRangeAsync(IEnumerable<EntityEdge> entityEdges)
{
await _context.EntityEdges.AddRangeAsync(entityEdges);
await _context.SaveChangesAsync();
}
public async Task<EntityEdge> GetByIdAsync(int id)
{
return await _context.EntityEdges.FindAsync(id);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,11 @@ public async Task<IEnumerable<ValueEdge>> GetAllAsync()
{
return await _context.ValueEdges.ToListAsync();
}

public async Task AddRangeAsync(IEnumerable<ValueEdge> valueEdges)
{
await _context.ValueEdges.AddRangeAsync(valueEdges);
await _context.SaveChangesAsync();
}
public async Task<ValueEdge> GetByIdAsync(int id)
{
return await _context.ValueEdges.FindAsync(id);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ namespace AnalysisData.EAV.Repository.NodeRepository.Abstraction;

public interface IEntityNodeRepository
{
Task AddRangeAsync(IEnumerable<EntityNode> entityNodes);
Task AddAsync(EntityNode entity);
Task<IEnumerable<EntityNode>> GetAllAsync();
Task<EntityNode> GetByNameAsync(string id);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@ namespace AnalysisData.EAV.Repository.NodeRepository.Abstraction;

public interface IValueNodeRepository
{
Task AddRangeAsync(IEnumerable<ValueNode> valueNodes);
Task AddAsync(ValueNode entity);
Task<IEnumerable<ValueNode>> GetAllAsync();
Task<ValueNode> GetByIdAsync(int id);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,11 @@ public async Task<EntityNode> GetByNameAsync(string name)
{
return await _context.EntityNodes.FirstOrDefaultAsync(x => x.Name == name);
}

public async Task AddRangeAsync(IEnumerable<EntityNode> entityNodes)
{
await _context.EntityNodes.AddRangeAsync(entityNodes);
await _context.SaveChangesAsync();
}
public async Task<EntityNode> GetByIdAsync(int id)
{
return await _context.EntityNodes.FirstOrDefaultAsync(x => x.Id == id);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,11 @@ public async Task AddAsync(ValueNode entity)
await _context.SaveChangesAsync();
}

public async Task AddRangeAsync(IEnumerable<ValueNode> valueNodes)
{
await _context.ValueNodes.AddRangeAsync(valueNodes);
await _context.SaveChangesAsync();
}
public async Task<IEnumerable<ValueNode>> GetAllAsync()
{
return await _context.ValueNodes.ToListAsync();
Expand Down
Original file line number Diff line number Diff line change
@@ -1,11 +1,9 @@
using AnalysisData.EAV.Dto;
using AnalysisData.EAV.Model;
using AnalysisData.EAV.Repository.Abstraction;
using AnalysisData.EAV.Repository.CategoryRepository.asbtraction;
using AnalysisData.EAV.Repository.FileUploadedRepository;
using AnalysisData.EAV.Service.Abstraction;
using AnalysisData.Exception;
using CsvHelper.TypeConversion;

namespace AnalysisData.EAV.Service;

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
using AnalysisData.EAV.Model;
using CsvHelper;

namespace AnalysisData.Graph.Service.ServiceBusiness.Abstraction;

public interface IEntityEdgeRecordProcessor
{
Task<IEnumerable<EntityEdge>> ProcessEntityEdgesAsync(CsvReader csv, string fromId, string toId);
}
Original file line number Diff line number Diff line change
@@ -1,8 +1,10 @@
using CsvHelper;
using AnalysisData.EAV.Model;
using CsvHelper;

namespace AnalysisData.EAV.Service.Business.Abstraction;

public interface INodeRecordProcessor
{
Task ProcessRecordsAsync(CsvReader csv, IEnumerable<string> headers, string id, int fileId);
Task<IEnumerable<EntityNode>> ProcessEntityNodesAsync(CsvReader csv, IEnumerable<string> headers, string id,
int fileId);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
using AnalysisData.EAV.Model;
using CsvHelper;

namespace AnalysisData.Graph.Service.ServiceBusiness.Abstraction;

public interface IValueEdgeProcessor
{
Task ProcessEntityEdgeValuesAsync(
CsvReader csv,
IEnumerable<string> headers,
string from,
string to,
IEnumerable<EntityEdge> entityEdges);
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
using AnalysisData.EAV.Model;
using CsvHelper;

namespace AnalysisData.Graph.Service.ServiceBusiness.Abstraction;

public interface IValueNodeProcessor
{
Task ProcessValueNodesAsync(CsvReader csv, IEnumerable<EntityNode> items, IEnumerable<string> headers, string id);
}
Original file line number Diff line number Diff line change
Expand Up @@ -11,17 +11,21 @@ public class CsvReaderService : ICsvReaderService
public CsvReader CreateCsvReader(IFormFile file)
{
var reader = new StreamReader(file.OpenReadStream());
return new CsvReader(reader, new CsvConfiguration(CultureInfo.InvariantCulture)
var config = new CsvConfiguration(CultureInfo.InvariantCulture)
{
Encoding = Encoding.UTF8,
HasHeaderRecord = true
});
};
return new CsvReader(reader, config);
}

public IEnumerable<string> ReadHeaders(CsvReader csv)
{
csv.Read();
csv.ReadHeader();
return csv.Context.Reader.HeaderRecord;
if (csv.Read())
{
csv.ReadHeader();
return csv.Context.Reader.HeaderRecord ?? Enumerable.Empty<string>();
}
return Enumerable.Empty<string>();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -56,8 +56,7 @@ private async Task<EntityEdge> CreateEntityEdgeAsync(string fromId, string toId)
return entityEdge;
}

private async Task ProcessValuesAsync(CsvReader csv, IEnumerable<string> headers, string from, string to,
EntityEdge entityEdge)
private async Task ProcessValuesAsync(CsvReader csv, IEnumerable<string> headers, string from, string to, EntityEdge entityEdge)
{
foreach (var header in headers)
{
Expand Down
Original file line number Diff line number Diff line change
@@ -1,27 +1,36 @@
using AnalysisData.EAV.Service.Abstraction;
using AnalysisData.EAV.Service.Business.Abstraction;
using AnalysisData.Graph.Service.ServiceBusiness.Abstraction;

namespace AnalysisData.EAV.Service;

public class EdgeToDbService : IEdgeToDbService
{
private readonly IEdgeRecordProcessor _edgeRecordProcessor;
private readonly IValueEdgeProcessor _valueEdgeProcessor;
private readonly IEntityEdgeRecordProcessor _entityEdgeRecordProcessor;
private readonly IFromToProcessor _fromToProcessor;
private readonly ICsvReaderService _csvReaderService;

public EdgeToDbService(IEdgeRecordProcessor edgeRecordProcessor, IFromToProcessor fromToProcessor,
ICsvReaderService csvReaderService)
public EdgeToDbService(IFromToProcessor fromToProcessor,
ICsvReaderService csvReaderService, IEntityEdgeRecordProcessor entityEdgeRecordProcessor, IValueEdgeProcessor valueEdgeProcessor)
{
_edgeRecordProcessor = edgeRecordProcessor;
_fromToProcessor = fromToProcessor;
_csvReaderService = csvReaderService;
_entityEdgeRecordProcessor = entityEdgeRecordProcessor;
_valueEdgeProcessor = valueEdgeProcessor;
}

public async Task ProcessCsvFileAsync(IFormFile file, string from, string to)
{
var csv = _csvReaderService.CreateCsvReader(file);
var headers = _csvReaderService.ReadHeaders(csv);

await _fromToProcessor.ProcessFromToAsync(headers, from, to);
await _edgeRecordProcessor.ProcessRecordsAsync(csv, headers, from, to);
var entityEdges = await _entityEdgeRecordProcessor.ProcessEntityEdgesAsync(csv, from, to);

csv = _csvReaderService.CreateCsvReader(file);
headers = _csvReaderService.ReadHeaders(csv);

await _valueEdgeProcessor.ProcessEntityEdgeValuesAsync(csv, headers, from, to, entityEdges);
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
using AnalysisData.EAV.Model;
using AnalysisData.EAV.Repository.EdgeRepository.Abstraction;
using AnalysisData.EAV.Repository.NodeRepository.Abstraction;
using AnalysisData.Graph.Service.ServiceBusiness.Abstraction;
using CsvHelper;

namespace AnalysisData.Graph.Service.ServiceBusiness;

public class EntityEdgeRecordProcessor : IEntityEdgeRecordProcessor
{
private readonly IEntityNodeRepository _entityNodeRepository;
private readonly IEntityEdgeRepository _entityEdgeRepository;
private readonly int _batchSize;

public EntityEdgeRecordProcessor(IEntityNodeRepository entityNodeRepository, IEntityEdgeRepository entityEdgeRepository, int batchSize = 1000)
{
_entityNodeRepository = entityNodeRepository;
_entityEdgeRepository = entityEdgeRepository;

_batchSize = batchSize;
}

public async Task<IEnumerable<EntityEdge>> ProcessEntityEdgesAsync(CsvReader csv, string from, string to)
{
var entityEdges = new List<EntityEdge>();
var batch = new List<EntityEdge>();
while (csv.Read())
{
var entityFrom = csv.GetField(from);
var entityTo = csv.GetField(to);
var fromNode = await _entityNodeRepository.GetByNameAsync(entityFrom);
var toNode = await _entityNodeRepository.GetByNameAsync(entityTo);
if (fromNode is null || toNode is null)
{
return null;
}

var entityEdge = new EntityEdge
{ EntityIDSource = fromNode.Id, EntityIDTarget = toNode.Id };

entityEdges.Add(entityEdge);
batch.Add(entityEdge);

if (batch.Count >= _batchSize)
{
await InsertBatchAsync(batch);
batch.Clear();
}

}
if (batch.Any())
{
await InsertBatchAsync(batch);
}
return entityEdges;
}
private async Task InsertBatchAsync(IEnumerable<EntityEdge> batch)
{
await _entityEdgeRepository.AddRangeAsync(batch);
}
}
Loading