Skip to content

Commit

Permalink
enhanced document model response filter
Browse files Browse the repository at this point in the history
  • Loading branch information
kadenbking committed Feb 7, 2024
1 parent d35bc96 commit 975b7db
Show file tree
Hide file tree
Showing 9 changed files with 210 additions and 108 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,8 @@ public async Task<IActionResult> AnalyzeDocument([FromForm] IFormFile file)

var analysis = await _contractAnalysisService.AnalyzeDocument(file, "prebuilt-document");
var result = _contractAnalysisService.FormatDocumentAnalysis(analysis);
await SaveJsonFile(documentResult: result);

// await SaveJsonFile(documentResult: result);
return Ok(result);
}

Expand All @@ -46,6 +47,8 @@ public async Task<IActionResult> AnalyzeContract([FromForm] IFormFile file)

var analysis = await _contractAnalysisService.AnalyzeDocument(file, "prebuilt-contract");
var result = _contractAnalysisService.FormatContractAnalysis(analysis);

// await SaveJsonFile(contractResult: result);
return Ok(result);
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
using CfContractAnalysisMvp.Api.Models.Document;

namespace CfContractAnalysisMvp.Api.Interfaces;

public interface IKeyTermService
{
public void FindKeyTerms(DocumentAnalysisResult analysisResult);
}
Original file line number Diff line number Diff line change
Expand Up @@ -4,23 +4,23 @@ public class DocumentAnalysisResult
{
public DocumentAnalysisResult()
{
BuyerName = string.Empty;
SellerName = string.Empty;
PropertyAddress = string.Empty;
ContractAmount = string.Empty;
ContractDate = string.Empty;
BuyerName = new List<DocumentKeyValuePair>();
SellerName = new List<DocumentKeyValuePair>();
PropertyAddress = new List<DocumentKeyValuePair>();
ContractAmount = new List<DocumentKeyValuePair>();
ContractDate = new List<DocumentKeyValuePair>();
KeyValuePairsList = new List<DocumentKeyValuePair>();
}

public string BuyerName { get; set; }
public List<DocumentKeyValuePair> BuyerName { get; set; }

public string SellerName { get; set; }
public List<DocumentKeyValuePair> SellerName { get; set; }

public string PropertyAddress { get; set; }
public List<DocumentKeyValuePair> PropertyAddress { get; set; }

public string ContractAmount { get; set; }
public List<DocumentKeyValuePair> ContractAmount { get; set; }

public string ContractDate { get; set; }
public List<DocumentKeyValuePair> ContractDate { get; set; }

public List<DocumentKeyValuePair> KeyValuePairsList { get; set; }
}
1 change: 1 addition & 0 deletions backend/src/sites/CfContractAnalysisMvp.Api/Program.cs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@

// Add services to the container.
builder.Services.AddTransient<IContractAnalysisService, ContractAnalysisService>();
builder.Services.AddTransient<IKeyTermService, KeyTermService>();

builder.Services.AddControllers();
// Learn more about configuring Swagger/OpenAPI at https://aka.ms/aspnetcore/swashbuckle
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -10,10 +10,12 @@ namespace CfContractAnalysisMvp.Api.Services;
public class ContractAnalysisService : IContractAnalysisService
{
private readonly IConfiguration _configuration;
private readonly IKeyTermService _keyTermService;

public ContractAnalysisService(IConfiguration configuration)
public ContractAnalysisService(IConfiguration configuration, IKeyTermService keyTermService)
{
_configuration = configuration;
_keyTermService = keyTermService;
}

public async Task<AnalyzeResult?> AnalyzeDocument(IFormFile file, string model)
Expand All @@ -39,13 +41,14 @@ public DocumentAnalysisResult FormatDocumentAnalysis(AnalyzeResult? analyzeResul
var analysisResult = new DocumentAnalysisResult();
foreach (DocumentKeyValuePair kvp in analyzeResult.KeyValuePairs)
{
IdentifyKeyTerm(kvp, analysisResult);
analysisResult.KeyValuePairsList.Add(new Models.Document.DocumentKeyValuePair()
{
Key = kvp.Key.Content,
Value = kvp.Value?.Content ?? "No Value"
});
}

_keyTermService.FindKeyTerms(analysisResult);

return analysisResult;
}
Expand All @@ -59,7 +62,6 @@ public ContractAnalysisResult FormatContractAnalysis(AnalyzeResult? analyzeResul
{
foreach (var fieldKeyValuePair in doc.Fields)
{

ContractFieldResult x = CreateFieldResults(fieldKeyValuePair.Key, fieldKeyValuePair.Value);
formattedResult.Results.Add(x);
}
Expand All @@ -68,62 +70,6 @@ public ContractAnalysisResult FormatContractAnalysis(AnalyzeResult? analyzeResul
return formattedResult;
}

private void IdentifyKeyTerm(DocumentKeyValuePair keyValuePair, DocumentAnalysisResult analysisResult)
{
List<string> keyTerms = new List<string>() { "buyer", "seller", "address", "amount", "date" };
var index = 0;
foreach (var term in keyTerms)
{
if (keyValuePair.Key.Content.IndexOf(term, StringComparison.OrdinalIgnoreCase) >= 0)
{
switch (index)
{
case 0:
if ((string.IsNullOrWhiteSpace(analysisResult.BuyerName) || keyValuePair.Key.Content.Equals(term, StringComparison.OrdinalIgnoreCase)) && keyValuePair.Value != null)
{
analysisResult.BuyerName = keyValuePair.Value.Content;
}

return;

case 1:
if ((string.IsNullOrWhiteSpace(analysisResult.SellerName) || keyValuePair.Key.Content.Equals(term, StringComparison.OrdinalIgnoreCase)) && keyValuePair.Value != null)
{
analysisResult.SellerName = keyValuePair.Value.Content;
}

return;

case 2:
if ((string.IsNullOrWhiteSpace(analysisResult.PropertyAddress) || keyValuePair.Key.Content.Equals(term, StringComparison.OrdinalIgnoreCase)) && keyValuePair.Value != null)
{
analysisResult.PropertyAddress = keyValuePair.Value.Content;
}

return;

case 3:
if ((string.IsNullOrWhiteSpace(analysisResult.ContractAmount) || keyValuePair.Key.Content.Equals(term, StringComparison.OrdinalIgnoreCase)) && keyValuePair.Value != null)
{
analysisResult.ContractAmount = keyValuePair.Value.Content;
}

return;

case 4:
if ((string.IsNullOrWhiteSpace(analysisResult.ContractDate) || keyValuePair.Key.Content.Equals(term, StringComparison.OrdinalIgnoreCase)) && keyValuePair.Value != null)
{
analysisResult.ContractAmount = keyValuePair.Value.Content;
}

return;
}
}

index++;
}
}

private ContractFieldResult CreateFieldResults(string documentKey, DocumentField documentField)
{
switch (documentField.FieldType)
Expand Down
100 changes: 100 additions & 0 deletions backend/src/sites/CfContractAnalysisMvp.Api/Services/KeyTermService.cs
Original file line number Diff line number Diff line change
@@ -0,0 +1,100 @@
using System.Text.RegularExpressions;
using CfContractAnalysisMvp.Api.Interfaces;
using CfContractAnalysisMvp.Api.Models.Document;

namespace CfContractAnalysisMvp.Api.Services;

public class KeyTermService : IKeyTermService
{
public void FindKeyTerms(DocumentAnalysisResult analysisResult)
{
var termsToSearchFor = new Dictionary<string, int>()
{
{ "buyer", 0 },
{ "seller", 1 },
{ "address", 2 },
{ "amount", 3 },
{ "date", 4 },
};

foreach (var kvp in analysisResult.KeyValuePairsList)
{
foreach (var keyTerm in termsToSearchFor.Keys)
{
if (kvp.Value is ":selected:" or ":unselected:" or "No Value") continue;

var cleanedKey = Regex.Replace(kvp.Key, "[^A-Za-z]", "");
if (cleanedKey.Length < keyTerm.Length) continue;

var score = ComputeLevenshteinDistance(keyTerm, cleanedKey.ToLower());
if (score <= 3)
{
SaveKeyValuePair(analysisResult, kvp, termsToSearchFor[keyTerm]);
}
}
}
}

private int ComputeLevenshteinDistance(string s, string t)
{
var n = s.Length;
var m = t.Length;
var d = new int[n + 1, m + 1];

if (n == 0)
{
return m;
}

if (m == 0)
{
return n;
}

for (var i = 0; i <= n; d[i, 0] = i++)
{
}

for (var j = 0; j <= m; d[0, j] = j++)
{
}

for (var i = 1; i <= n; i++)
{
for (var j = 1; j <= m; j++)
{
var cost = (t[j - 1] == s[i - 1]) ? 0 : 1;
d[i, j] = Math.Min(
Math.Min(d[i - 1, j] + 1, d[i, j - 1] + 1),
d[i - 1, j - 1] + cost);
}
}

return d[n, m];
}

private void SaveKeyValuePair(DocumentAnalysisResult analysisResult, DocumentKeyValuePair kvp, int index)
{
switch (index)
{
case 0:
analysisResult.BuyerName.Add(kvp);
break;
case 1:
analysisResult.SellerName.Add(kvp);
break;
case 2:
analysisResult.PropertyAddress.Add(kvp);
break;
case 3:
if (kvp.Value.Contains('$'))
{
analysisResult.ContractAmount.Add(kvp);
}
break;
case 4:
analysisResult.ContractDate.Add(kvp);
break;
}
}
}
1 change: 0 additions & 1 deletion backend/src/sites/CfContractAnalysisMvp.Api/sample.json

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,6 @@
using System.Threading.Tasks;
using Azure.AI.FormRecognizer.DocumentAnalysis;
using CfContractAnalysisMvp.Api.Interfaces;
using CfContractAnalysisMvp.Api.Models.Contract;
using CfContractAnalysisMvp.Api.Services;
using Microsoft.Extensions.Configuration;
using NSubstitute;
Expand All @@ -14,35 +13,22 @@ namespace CfContractAnalysisMvp.UnitTests.Services;
public class CfContractAnalysisMvpTests
{
private readonly IConfiguration _configuration;
private readonly IKeyTermService _keyTermService;

public CfContractAnalysisMvpTests()
{
_configuration = Substitute.For<IConfiguration>();
_keyTermService = Substitute.For<IKeyTermService>();
}

private IContractAnalysisService Sut => new ContractAnalysisService(_configuration);
private IContractAnalysisService Sut => new ContractAnalysisService(_configuration, _keyTermService);

[Fact]
public async Task Should_fail_null_values()
{
string fileName = "../../../Files/testResult.json";
string jsonString = await File.ReadAllTextAsync(fileName);
AnalyzeResult testResult = JsonSerializer.Deserialize<AnalyzeResult>(jsonString)!;

// var formattedResult = new ContractAnalysisResult();
// foreach (var doc in testResult.Documents)
// {
// foreach (var fieldKeyValuePair in doc.Fields)
// {
// // CreateFieldResults(fieldKeyValuePair, formattedResult.Results);
// ContractFieldResult x = Sut.CreateFieldResultsTest(fieldKeyValuePair.Key, fieldKeyValuePair.Value);
// }
// }

// '/Users/kaden/dev/cf/cf-contract-analysis-mvp/backend/tests/CfContractAnalysisMvp.UnitTests/bin/Debug/Files/testResult.json'.

// var result = await Sut.AnalyzeDocument(null);
// result.ShouldBeEquivalentTo(new List<DocumentKeyValuePair>() );
}

// [Theory]
Expand Down

0 comments on commit 975b7db

Please sign in to comment.