Skip to content
Docs

Regulatory Policy Search Engine

Financial services compliance teams need to search across thousands of regulatory policies to ensure compliance with constantly changing regulations. The stakes are high: a missed policy can result in regulatory fines, audit failures, and reputational damage. Traditional keyword search achieves only 50-60% relevance because regulatory language is dense and varied — the same concept (“customer due diligence”) appears under different terms across jurisdictions (“KYC requirements,” “enhanced verification procedures,” “client identification protocols”), and keyword search misses these semantic relationships.

Hybrid semantic search combines vector and keyword retrieval to achieve 90%+ relevance with comprehensive policy discovery. Hybrid search is essential here because regulatory searches have both semantic intent (find policies about customer verification) and exact-term requirements (find policies mentioning “Section 314(b)” or “BSA/AML”). Neither vector search nor keyword search alone satisfies both needs.

Beluga AI’s retriever package enables hybrid search combining semantic and keyword retrieval. The system analyzes query intent, searches using both methods, merges results with regulatory relationship context, and ranks by relevance.

┌──────────────┐ ┌──────────────┐ ┌──────────────┐
│ Regulatory │───▶│ Query │───▶│ Hybrid │
│ Query │ │ Analyzer │ │ Retriever │
└──────────────┘ └──────────────┘ └──────┬───────┘
┌──────────────────┴────────────────┐
▼ ▼
┌──────────────┐ ┌──────────────┐
│ Semantic │ │ Keyword │
│ Retriever │ │ Retriever │
└──────┬───────┘ └──────┬───────┘
│ │
└──────────────┬───────────────────┘
┌──────────────┐
│ Result │
│ Merger │
└──────┬───────┘
┌──────────────┐
│ Relationship │
│ Enhancer │
└──────┬───────┘
┌──────────────┐
│ Ranked │
│ Results │
└──────────────┘

The regulatory search engine combines semantic and keyword retrieval with relationship enhancement:

package main
import (
"context"
"fmt"
"sort"
"github.com/lookatitude/beluga-ai/rag/retriever"
"github.com/lookatitude/beluga-ai/schema"
_ "github.com/lookatitude/beluga-ai/rag/retriever/providers/hybrid"
_ "github.com/lookatitude/beluga-ai/rag/retriever/providers/vector"
)
// RegulatorySearchEngine searches regulatory policies using hybrid retrieval.
type RegulatorySearchEngine struct {
retriever retriever.Retriever
relationshipGraph *RelationshipGraph
}
func NewRegulatorySearchEngine(ctx context.Context) (*RegulatorySearchEngine, error) {
// Hybrid retriever combines vector and keyword search
ret, err := retriever.New("hybrid", retriever.Config{
VectorWeight: 0.7,
KeywordWeight: 0.3,
})
if err != nil {
return nil, fmt.Errorf("create retriever: %w", err)
}
return &RegulatorySearchEngine{
retriever: ret,
relationshipGraph: NewRelationshipGraph(),
}, nil
}
// Search searches for regulatory policies using hybrid retrieval.
func (r *RegulatorySearchEngine) Search(
ctx context.Context,
query string,
filters SearchFilters,
) ([]PolicyResult, error) {
// Retrieve using hybrid search
docs, err := r.retriever.Retrieve(ctx, query,
retriever.WithTopK(20),
retriever.WithThreshold(0.7),
)
if err != nil {
return nil, fmt.Errorf("retrieve documents: %w", err)
}
// Enhance with regulatory relationships
enhanced := r.enhanceWithRelationships(ctx, docs)
// Filter by regulation type if specified
if filters.RegulationType != "" {
enhanced = r.filterByType(enhanced, filters.RegulationType)
}
// Rank results by relevance and recency
ranked := r.rankResults(enhanced, query)
return ranked, nil
}
func (r *RegulatorySearchEngine) enhanceWithRelationships(
ctx context.Context,
docs []schema.Document,
) []PolicyResult {
results := make([]PolicyResult, len(docs))
for i, doc := range docs {
policyID := doc.Metadata["policy_id"].(string)
// Get related policies from relationship graph
related := r.relationshipGraph.GetRelatedPolicies(ctx, policyID)
results[i] = PolicyResult{
Policy: doc,
Related: related,
Relevance: getRelevanceScore(doc),
}
}
return results
}
func (r *RegulatorySearchEngine) filterByType(
results []PolicyResult,
regulationType string,
) []PolicyResult {
var filtered []PolicyResult
for _, result := range results {
if result.Policy.Metadata["regulation_type"] == regulationType {
filtered = append(filtered, result)
}
}
return filtered
}
func (r *RegulatorySearchEngine) rankResults(
results []PolicyResult,
query string,
) []PolicyResult {
// Sort by relevance score (descending)
sort.Slice(results, func(i, j int) bool {
return results[i].Relevance > results[j].Relevance
})
return results
}
func getRelevanceScore(doc schema.Document) float64 {
if score, ok := doc.Metadata["relevance_score"].(float64); ok {
return score
}
return 0.0
}
type SearchFilters struct {
RegulationType string
DateFrom string
DateTo string
}
type PolicyResult struct {
Policy schema.Document
Related []RelatedPolicy
Relevance float64
}
type RelatedPolicy struct {
PolicyID string
Relationship string
Title string
}

Build a graph of policy relationships to enhance search results:

type RelationshipGraph struct {
relationships map[string][]RelatedPolicy // policyID -> related policies
}
func NewRelationshipGraph() *RelationshipGraph {
return &RelationshipGraph{
relationships: make(map[string][]RelatedPolicy),
}
}
func (g *RelationshipGraph) GetRelatedPolicies(
ctx context.Context,
policyID string,
) []RelatedPolicy {
return g.relationships[policyID]
}
func (g *RelationshipGraph) AddRelationship(
policyID string,
related RelatedPolicy,
) {
g.relationships[policyID] = append(g.relationships[policyID], related)
}
func (g *RelationshipGraph) BuildFromPolicies(ctx context.Context, policies []schema.Document) error {
// Build relationships based on citations, amendments, and topic overlap
for _, policy := range policies {
policyID := policy.Metadata["policy_id"].(string)
// Find cited policies
if citations, ok := policy.Metadata["citations"].([]string); ok {
for _, citedID := range citations {
g.AddRelationship(policyID, RelatedPolicy{
PolicyID: citedID,
Relationship: "cites",
})
}
}
// Find amended policies
if amends, ok := policy.Metadata["amends"].(string); ok {
g.AddRelationship(policyID, RelatedPolicy{
PolicyID: amends,
Relationship: "amends",
})
}
}
return nil
}

Track and index new regulatory updates in real-time:

type PolicyUpdateTracker struct {
engine *RegulatorySearchEngine
updateChan chan PolicyUpdate
}
func NewPolicyUpdateTracker(engine *RegulatorySearchEngine) *PolicyUpdateTracker {
return &PolicyUpdateTracker{
engine: engine,
updateChan: make(chan PolicyUpdate, 100),
}
}
func (t *PolicyUpdateTracker) Start(ctx context.Context) error {
for {
select {
case <-ctx.Done():
return ctx.Err()
case update := <-t.updateChan:
if err := t.processUpdate(ctx, update); err != nil {
// Log error but continue processing
continue
}
}
}
}
func (t *PolicyUpdateTracker) processUpdate(ctx context.Context, update PolicyUpdate) error {
// Index the new or updated policy
doc := schema.Document{
Content: update.Content,
Metadata: map[string]any{
"policy_id": update.PolicyID,
"regulation_type": update.RegulationType,
"effective_date": update.EffectiveDate,
},
}
// Add to vector store for semantic search
// Implementation depends on vector store provider
// Update relationship graph
t.engine.relationshipGraph.AddRelationship(update.PolicyID, RelatedPolicy{
PolicyID: update.AmendedPolicyID,
Relationship: "amends",
})
return nil
}
type PolicyUpdate struct {
PolicyID string
Content string
RegulationType string
EffectiveDate string
AmendedPolicyID string
}

Implement score-based merging for hybrid results:

func (r *RegulatorySearchEngine) mergeResults(
vectorResults []schema.Document,
keywordResults []schema.Document,
vectorWeight float64,
keywordWeight float64,
) []schema.Document {
scoreMap := make(map[string]float64)
// Combine scores from both retrievers
for _, doc := range vectorResults {
id := doc.Metadata["policy_id"].(string)
score := getRelevanceScore(doc)
scoreMap[id] = score * vectorWeight
}
for _, doc := range keywordResults {
id := doc.Metadata["policy_id"].(string)
score := getRelevanceScore(doc)
scoreMap[id] += score * keywordWeight
}
// Deduplicate and sort by combined score
var merged []schema.Document
seen := make(map[string]bool)
allDocs := append(vectorResults, keywordResults...)
for _, doc := range allDocs {
id := doc.Metadata["policy_id"].(string)
if !seen[id] {
doc.Metadata["relevance_score"] = scoreMap[id]
merged = append(merged, doc)
seen[id] = true
}
}
sort.Slice(merged, func(i, j int) bool {
return getRelevanceScore(merged[i]) > getRelevanceScore(merged[j])
})
return merged
}

Track search metrics to monitor performance:

import (
"go.opentelemetry.io/otel"
"go.opentelemetry.io/otel/attribute"
)
func (r *RegulatorySearchEngine) SearchWithMonitoring(
ctx context.Context,
query string,
filters SearchFilters,
) ([]PolicyResult, error) {
tracer := otel.Tracer("regulatory-search")
ctx, span := tracer.Start(ctx, "search.regulatory")
defer span.End()
span.SetAttributes(
attribute.String("query", query),
attribute.String("regulation_type", filters.RegulationType),
)
results, err := r.Search(ctx, query, filters)
if err != nil {
span.RecordError(err)
return nil, err
}
span.SetAttributes(
attribute.Int("results_count", len(results)),
)
return results, nil
}

Regulatory policy search delivered significant improvements:

MetricBeforeAfterImprovement
Search Relevance (%)50-609253-84%
Policy Discovery Rate (%)70-809620-37%
Manual Review Time (hours/week)8-102.575-81% reduction
Compliance Violations/Year3-50100% reduction
Satisfaction Score5.5/109.1/1065%