This is the deep end. We're covering NLP pipelines, genre-aware scoring, Rust microservices, and the exact formulas Nova uses to assess content quality.
For the user-facing explanation, see How Nova's Quality Algorithm Works.
Nova's NLP engine is a native Rust library compiled to Node.js bindings via NAPI-RS, providing genre-aware structural and semantic quality assessment. The library architecture delivers:
Key Features:
Architecture Flow:
The Rust library processes text through five parallel analyzers, each optimized for specific linguistic features:
1. Lexical Analysis (Custom Implementation)
2. Syntactic Analysis (Pattern Matching)
3. Discourse Analysis (Regex + Heuristics)
4. Semantic Analysis (BERT Embeddings)
5. Grammar Analysis (nlprule + Custom Rules)
Moving-Average Type-Token Ratio (MATTR) with Fiction Optimization
The lexical analyzer uses an enhanced MATTR implementation that adjusts for creative writing patterns:
1fn calculate_vocabulary_richness(words: &[String]) -> f64 {
2 if words.is_empty() {
3 return 0.0;
4 }
5
6 // Enhanced MATTR with genre-aware weighting
7 // Fiction benefits from varied vocabulary while maintaining readability
8 let window_size = 100.min(words.len());
9 if window_size < 10 {
10 return calculate_ttr(words);
11 }
12
13 let mut ttr_sum = 0.0;
14 let mut count = 0;
15
16 for i in 0..=(words.len().saturating_sub(window_size)) {
17 let window = &words[i..i + window_size];
18 ttr_sum += calculate_ttr(window);
19 count += 1;
20 }
21
22 let mattr = if count > 0 {
23 ttr_sum / count as f64
24 } else {
25 0.0
26 };
27
28 // Apply fiction-specific adjustment
29 // Fiction typically has MATTR 0.7-0.85 (lower than academic writing)
30 // Scale to make 0.75 optimal
31 let adjusted = if mattr < 0.75 {
32 mattr / 0.75
33 } else {
34 1.0 - ((mattr - 0.75) * 0.5)
35 };
36
37 adjusted.max(0.0).min(1.0)
38}
39
40fn calculate_ttr(words: &[String]) -> f64 {
41 if words.is_empty() {
42 return 0.0;
43 }
44
45 let unique: HashSet<String> = words.iter().cloned().collect();
46 unique.len() as f64 / words.len() as f64
47}Why MATTR over raw TTR? Raw type-token ratio penalizes longer works. MATTR maintains consistency across text lengths, which is essential for comparing a 2k-word short story to a 50k-word novel.
N-gram Repetition Detection (Fiction-Weighted):
Nova analyzes repetitive patterns at multiple n-gram levels, with heavier penalties for repeated phrases:
1fn calculate_repetition_score(words: &[String]) -> f64 {
2 if words.len() < 10 {
3 return 1.0; // Perfect score for short texts
4 }
5
6 // Count n-gram collisions with fiction-aware weighting
7 let bigrams = extract_ngrams(words, 2);
8 let trigrams = extract_ngrams(words, 3);
9 let fourgrams = extract_ngrams(words, 4);
10
11 let bigram_repetition = calculate_ngram_repetition(&bigrams);
12 let trigram_repetition = calculate_ngram_repetition(&trigrams);
13 let fourgram_repetition = calculate_ngram_repetition(&fourgrams);
14
15 // Weighted average: longer n-grams matter more in fiction
16 // Repeated phrases are worse than repeated word pairs
17 let weighted_repetition =
18 (bigram_repetition * 0.2) + (trigram_repetition * 0.4) + (fourgram_repetition * 0.4);
19
20 // Invert (higher score = less repetition)
21 1.0 - weighted_repetition.min(1.0)
22}
23
24fn calculate_ngram_repetition(ngrams: &[String]) -> f64 {
25 if ngrams.is_empty() {
26 return 0.0;
27 }
28
29 let mut freq: HashMap<String, usize> = HashMap::new();
30 for ngram in ngrams {
31 *freq.entry(ngram.clone()).or_insert(0) += 1;
32 }
33
34 // Calculate how many ngrams appear more than once
35 let repeated = freq.values().filter(|count| **count > 1).count();
36 repeated as f64 / freq.len() as f64
37}Distribution Analysis
1fn calculate_paragraph_variance(&self, paragraphs: &[String]) -> f32 {
2 if paragraphs.len() < 2 {
3 return 0.0;
4 }
5
6 let lengths: Vec<f32> = paragraphs
7 .iter()
8 .map(|p| p.split_whitespace().count() as f32)
9 .collect();
10
11 let mean = lengths.iter().sum::<f32>() / lengths.len() as f32;
12
13 let variance = lengths.iter()
14 .map(|&len| (len - mean).powi(2))
15 .sum::<f32>() / lengths.len() as f32;
16
17 variance.sqrt() // Return standard deviation
18}
19
20fn score_paragraph_variance(&self, variance: f32) -> f32 {
21 // Optimal variance: 15-200 words
22 // Too low = monotonous, too high = erratic
23 self.score_against_range(variance, 15.0, 200.0)
24}Pacing Score via Sentence Length Windows:
1fn calculate_pacing_score(&self, text: &str, _paragraphs: &[String]) -> f32 {
2 let sentences = self.extract_all_sentences(text);
3
4 if sentences.len() < 10 {
5 return 0.5; // Not enough data
6 }
7
8 let window_size = 10;
9 let mut variances = Vec::new();
10
11 for i in 0..=(sentences.len().saturating_sub(window_size)) {
12 let window = &sentences[i..i + window_size];
13 let variance = self.calculate_sentence_length_variance(window);
14 variances.push(variance);
15 }
16
17 if variances.is_empty() {
18 return 0.5;
19 }
20
21 let mean_variance = variances.iter().sum::<f32>() / variances.len() as f32;
22
23 // Normalize to 0-1 range (optimal variance around 20-40)
24 (mean_variance / 60.0).min(1.0)
25}Structural Analysis
1fn calculate_dialogue_density(&self, text: &str) -> f32 {
2 static DIALOGUE_REGEX: OnceLock<Regex> = OnceLock::new();
3 let regex = DIALOGUE_REGEX.get_or_init(|| {
4 Regex::new(r#"[""]([^""]+)[""]"#).unwrap()
5 });
6
7 let total_chars = text.chars().count();
8 if total_chars == 0 {
9 return 0.0;
10 }
11
12 let dialogue_chars: usize = regex
13 .find_iter(text)
14 .map(|m| m.as_str().chars().count())
15 .sum();
16
17 dialogue_chars as f32 / total_chars as f32
18}
19
20fn score_dialogue_density(&self, density: f32, genre: &Genre) -> f32 {
21 let (min_target, max_target) = genre.dialogue_density_target();
22
23 // Poetry doesn't use dialogue metrics
24 if min_target == 0.0 && max_target == 0.0 {
25 return 100.0;
26 }
27
28 self.score_against_range(density, min_target, max_target)
29}Genre-Specific Dialogue Targets:
1impl Genre {
2 pub fn dialogue_density_target(&self) -> (f32, f32) {
3 match self {
4 Genre::Romance => (0.35, 0.60),
5 Genre::Thriller => (0.25, 0.45),
6 Genre::Literary => (0.20, 0.40),
7 Genre::Poetry => (0.0, 0.0), // Exempt
8 Genre::YA => (0.30, 0.55),
9 _ => (0.25, 0.50),
10 }
11 }
12}Syntactic Complexity Metrics
1fn calculate_complexity(&self, sentences: &[String]) -> f32 {
2 if sentences.is_empty() {
3 return 0.0;
4 }
5
6 // Heuristic: count subordinate conjunctions and relative pronouns
7 let complexity_indicators = [
8 "although", "because", "while", "whereas", "if", "unless",
9 "who", "which", "that", "whom", "whose",
10 ];
11
12 let mut total_complexity = 0.0;
13
14 for sentence in sentences {
15 let lower = sentence.to_lowercase();
16 let indicator_count = complexity_indicators.iter()
17 .filter(|&&word| lower.contains(word))
18 .count();
19
20 // Normalize by sentence length
21 let word_count = sentence.split_whitespace().count().max(1);
22 total_complexity += indicator_count as f32 / word_count as f32;
23 }
24
25 total_complexity / sentences.len() as f32
26}Passive Voice Detection:
1fn calculate_passive_voice_ratio(&self, sentences: &[String]) -> f32 {
2 if sentences.is_empty() {
3 return 0.0;
4 }
5
6 let be_verbs = ["is", "are", "was", "were", "been", "be", "being"];
7
8 let passive_count = sentences
9 .iter()
10 .filter(|s| self.likely_passive_voice(s, &be_verbs))
11 .count();
12
13 passive_count as f32 / sentences.len() as f32
14}
15
16fn likely_passive_voice(&self, sentence: &str, be_verbs: &[&str]) -> bool {
17 let lower = sentence.to_lowercase();
18 let words: Vec<&str> = lower.split_whitespace().collect();
19
20 for (i, word) in words.iter().enumerate() {
21 if be_verbs.contains(word) {
22 // Check if followed by past participle (heuristic: ends in -ed)
23 if i + 1 < words.len() {
24 let next_word = words[i + 1];
25 if next_word.ends_with("ed") {
26 return true;
27 }
28 }
29 }
30 }
31 false
32}Multi-Tier Caching Architecture
Nova's semantic analysis uses transformer embeddings with an aggressive caching strategy to minimize redundant computation:
1/// Compute sentence embedding with FNV-1a hash-based caching
2/// **TIER 1**: Memory cache (HashMap with model-specific keys)
3/// **TIER 2**: Disk cache (bincode serialization to ~/.cache/nova-nlp-embeddings)
4/// **TIER 3**: BERT forward pass (only on cache miss)
5pub fn compute_sentence_embedding(text: &str) -> Result<Vec<f32>> {
6 let hash = hash_text(text); // FNV-1a hash including model identifier
7
8 // TIER 1: Check in-memory cache
9 if let Some(cache) = EMBEDDING_CACHE.get() {
10 if let Ok(cache) = cache.lock() {
11 if let Some(embedding) = cache.get(&hash) {
12 return Ok(embedding.clone());
13 }
14 }
15 }
16
17 // TIER 2: Check disk cache
18 if let Some(embedding) = load_from_disk_cache(hash) {
19 // Warm up memory cache for next time
20 if let Some(cache) = EMBEDDING_CACHE.get() {
21 if let Ok(mut cache) = cache.lock() {
22 cache.insert(hash, embedding.clone());
23 }
24 }
25 return Ok(embedding);
26 }
27
28 // TIER 3: Compute with BERT
29 let tokenizer = TOKENIZER.get()?;
30 let model = BERT_MODEL.get()?;
31 let device = DEVICE.get()?;
32
33 let encoding = tokenizer.encode(text, false)?;
34 let tokens = encoding.get_ids();
35 let token_type_ids = encoding.get_type_ids();
36
37 let token_ids = Tensor::new(tokens, device)?.unsqueeze(0)?;
38 let token_type_ids = Tensor::new(token_type_ids, device)?.unsqueeze(0)?;
39
40 let embeddings = model.forward(&token_ids, &token_type_ids, None)?;
41 let pooled = embeddings.mean(1)?; // Mean pooling
42 let embedding_vec = pooled.squeeze(0)?.to_vec1::<f32>()?;
43
44 // Save to both caches
45 save_to_cache(&hash, &embedding_vec);
46
47 Ok(embedding_vec)
48}Batch Processing with Rayon Parallelization:
1/// Batch compute embeddings with parallelization
2/// Uses 64-item chunks for optimal throughput on 12-core systems
3pub fn compute_embeddings_batch(texts: &[String]) -> Result<Vec<Vec<f32>>> {
4 if texts.is_empty() {
5 return Ok(vec![]);
6 }
7
8 // Parallel cache lookup and computation
9 let results: Vec<(usize, Result<Vec<f32>>)> = texts
10 .par_iter()
11 .enumerate()
12 .chunks(OPTIMAL_CHUNK_SIZE) // 64 items per chunk
13 .flat_map(|chunk| {
14 chunk.into_par_iter().map(|(i, text)| {
15 (i, compute_sentence_embedding(text))
16 })
17 })
18 .collect();
19
20 // Sort by index to maintain order
21 let mut sorted_results = vec![vec![]; texts.len()];
22 for (i, result) in results {
23 sorted_results[i] = result?;
24 }
25
26 Ok(sorted_results)
27}Coherence Calculation Using BERT Embeddings:
1fn calculate_coherence(sentences: &[String]) -> Result<f64> {
2 if sentences.len() < 2 {
3 return Ok(1.0);
4 }
5
6 // Batch process all embeddings in one forward pass
7 let embeddings = compute_embeddings_batch(sentences)?;
8
9 // Calculate cosine similarity between adjacent sentences
10 let mut coherence_scores = Vec::new();
11 for i in 0..embeddings.len() - 1 {
12 let similarity = cosine_similarity(&embeddings[i], &embeddings[i + 1]);
13 coherence_scores.push(similarity as f64);
14 }
15
16 if coherence_scores.is_empty() {
17 return Ok(0.7);
18 }
19
20 Ok(coherence_scores.iter().sum::<f64>() / coherence_scores.len() as f64)
21}
22
23fn cosine_similarity(a: &[f32], b: &[f32]) -> f32 {
24 if a.len() != b.len() {
25 return 0.0;
26 }
27
28 let dot: f32 = a.iter().zip(b.iter()).map(|(x, y)| x * y).sum();
29 let norm_a: f32 = a.iter().map(|x| x * x).sum::<f32>().sqrt();
30 let norm_b: f32 = b.iter().map(|x| x * x).sum::<f32>().sqrt();
31
32 if norm_a == 0.0 || norm_b == 0.0 {
33 return 0.0;
34 }
35
36 dot / (norm_a * norm_b)
37}
38
39 // Jaccard similarity
40 intersection as f32 / union as f32
41}Nova supports two transformer models with different speed/quality tradeoffs:
Fast Mode (MiniLM-L6-v2):
Deep Mode (BGE-base-en-v1.5):
1// Initialize with specific model
2import { initialize, AnalysisModel } from '@workspace/nlp-core';
3
4await initialize(AnalysisModel.Fast); // Default: quick analysis
5// OR
6await initialize(AnalysisModel.Deep); // Deep analysis mode (BGE-base)1// Model configuration in Rust
2#[derive(Debug, Clone)]
3struct ModelConfig {
4 model: AnalysisModel,
5 model_name: &'static str,
6 embedding_dim: usize,
7 model_path: &'static str,
8}
9
10impl ModelConfig {
11 fn for_model(model: AnalysisModel) -> Self {
12 match model {
13 AnalysisModel::Fast => ModelConfig {
14 model,
15 model_name: "MiniLM-L6-v2",
16 embedding_dim: 384,
17 model_path: "minilm",
18 },
19 AnalysisModel::Deep => ModelConfig {
20 model,
21 model_name: "BGE-base-en-v1.5",
22 embedding_dim: 768,
23 model_path: "bge-base",
24 },
25 }
26 }
27}1. Narrative Coherence (Entity Tracking + Embedding Similarity)
Combines BERT semantic coherence with named entity consistency:
1fn calculate_narrative_coherence_real(paragraphs: &[String]) -> Result<f64> {
2 // Batch compute all paragraph embeddings
3 let embeddings = compute_embeddings_batch(paragraphs)?;
4
5 // Semantic flow score (adjacent paragraph similarity)
6 let mut flow_scores = Vec::new();
7 for i in 0..embeddings.len() - 1 {
8 let sim = cosine_similarity(&embeddings[i], &embeddings[i + 1]);
9 flow_scores.push(sim as f64);
10 }
11
12 let semantic_flow = if !flow_scores.is_empty() {
13 flow_scores.iter().sum::<f64>() / flow_scores.len() as f64
14 } else {
15 0.7
16 };
17
18 // Entity consistency tracking
19 let mut entity_tracker: HashMap<String, Vec<usize>> = HashMap::new();
20 for (idx, paragraph) in paragraphs.iter().enumerate() {
21 let entities = extract_entities(paragraph);
22 for entity in entities {
23 entity_tracker.entry(entity).or_default().push(idx);
24 }
25 }
26
27 // Calculate entity distribution score
28 let entity_spread: f64 = entity_tracker.values()
29 .map(|appearances| {
30 let span = appearances.last().unwrap() - appearances.first().unwrap();
31 span as f64 / paragraphs.len() as f64
32 })
33 .sum::<f64>() / entity_tracker.len().max(1) as f64;
34
35 // Combine scores (60% semantic, 40% entity)
36 Ok(semantic_flow * 0.6 + entity_spread * 0.4)
37}2. Emotional Impact (Anchor-Based Normalization)
Instead of relying on raw sentiment analysis, we use Semantic Anchors—curated vectors representing "Physical Reaction", "Internal Thought", and "High Emotion". We measure the cosine similarity of each sentence against these anchors and normalize the result.
1fn measure_emotional_impact(text: &str, sentences: &[String], embeddings: &[Vec<f32>]) -> Result<f64> {
2 // 1. Compute similarity to semantic anchors
3 let mut total_phys = 0.0;
4 let mut total_internal = 0.0;
5 let mut total_high = 0.0;
6
7 for emb in embeddings {
8 let (phys, internal, high) = classifier.emotional_strengths(emb);
9 total_phys += phys;
10 total_internal += internal;
11 total_high += high;
12 }
13
14 // 2. Normalize relative to model thresholds
15 // Raw cosine similarity is often low (0.1-0.2), so we scale it
16 // based on the specific model's distribution.
17 let n = embeddings.len() as f64;
18 let norm_phys = ((total_phys / n) * 4.0).min(1.0);
19 let norm_internal = ((total_internal / n) * 4.0).min(1.0);
20 let norm_high = ((total_high / n) * 3.0).min(1.0);
21
22 // 3. Calculate Emotional Variance (Volatility)
23 // Good emotional impact often involves ups and downs, not a flat line.
24 let emotional_variance = calculate_embedding_variance(embeddings);
25
26 // 4. Composite Score
27 // Physical reactions (25%) + Internal thought (30%) + High emotion (15%) + Variance (15%)
28 let score = norm_phys * 0.25
29 + norm_internal * 0.30
30 + norm_high * 0.15
31 + emotional_variance * 0.15;
32
33 Ok(score.min(1.0))
34}3. Character Arc Detection (Trajectory Analysis)
We detect character development by analyzing the semantic trajectory of the text. A flat line in vector space means the character hasn't changed. A curve or shift indicates growth.
1fn detect_character_arc(text: &str, paragraphs: &[String], embeddings: &[Vec<f32>]) -> Result<f64> {
2 // 1. Semantic Shift (Start vs End)
3 // We compare the average embedding of the first third vs the last third.
4 let start_vec = average_embedding(&embeddings[0..third]);
5 let end_vec = average_embedding(&embeddings[last_third..]);
6 let shift = 1.0 - cosine_similarity(&start_vec, &end_vec);
7
8 // 2. Internal Progression
9 // Does the density of "Internal Thought" increase?
10 // (Common in realization/epiphany arcs)
11 let internal_growth = calculate_internal_growth(embeddings);
12
13 // 3. Emotional Variance
14 // A character arc usually implies an emotional journey.
15 let variance = calculate_sentiment_variance(embeddings);
16
17 // Combined Score
18 // Shift (40%) + Progression (30%) + Variance (30%)
19 Ok((shift * 0.4 + internal_growth * 0.3 + variance * 0.3).min(1.0))
20}3. Reader Engagement Prediction (Multi-Factor)
1fn predict_engagement(text: &str, sentences: &[String], embeddings: &[Vec<f32>]) -> Result<f64> {
2 // Factor 1: Sentence variety (prevents monotony)
3 let lengths: Vec<usize> = sentences.iter()
4 .map(|s| s.split_whitespace().count())
5 .collect();
6
7 let mean_len = lengths.iter().sum::<usize>() as f64 / lengths.len() as f64;
8 let variance = lengths.iter()
9 .map(|&len| (len as f64 - mean_len).powi(2))
10 .sum::<f64>() / lengths.len() as f64;
11
12 let variety_score = (variance.sqrt() / 10.0).min(1.0);
13
14 // Factor 2: Dialogue presence (increases engagement)
15 let dialogue_density = text.matches('"').count() as f64 / text.len() as f64;
16 let dialogue_score = (dialogue_density * 100.0).min(1.0);
17
18 // Factor 3: Semantic coherence (easy to follow)
19 let mut coherence_scores = Vec::new();
20 for i in 0..embeddings.len().saturating_sub(1) {
21 coherence_scores.push(cosine_similarity(&embeddings[i], &embeddings[i + 1]) as f64);
22 }
23 let coherence = coherence_scores.iter().sum::<f64>() / coherence_scores.len().max(1) as f64;
24
25 // Factor 4: Active voice ratio (more engaging)
26 let passive_count = detect_passive_voice(sentences);
27 let active_ratio = 1.0 - (passive_count as f64 / sentences.len() as f64);
28
29 // Weighted combination
30 Ok(variety_score * 0.25
31 + dialogue_score * 0.25
32 + coherence * 0.3
33 + active_ratio * 0.2)
34}Nova includes custom rules beyond standard grammar checking:
Note: These rules will eventually be modularized into a dedicated dataset.
1/// Pre-compute embeddings for semantic pattern detection
2fn init_semantic_patterns() -> Result<()> {
3 // Seed phrases for filter word detection
4 let filter_seeds = vec![
5 "felt like", "seemed to", "appeared to", "looked like",
6 "noticed that", "realized that", "thought that", "wondered if",
7 "could see", "could hear", "started to", "began to",
8 ];
9
10 // Seed phrases for weak verb detection
11 let weak_verb_seeds = vec![
12 "was walking", "was running", "was thinking", "was feeling",
13 "were going", "is being", "are making",
14 ];
15
16 // Compute BERT embeddings for semantic matching
17 let filter_embeddings = compute_embeddings_batch(&filter_seeds)?;
18 let weak_verb_embeddings = compute_embeddings_batch(&weak_verb_seeds)?;
19
20 FILTER_WORD_EMBEDDINGS.set(filter_embeddings)?;
21 WEAK_VERB_EMBEDDINGS.set(weak_verb_embeddings)?;
22
23 Ok(())
24}Genre-Weighted Formula
1pub fn calculate_score(metrics: &StructuralMetrics, genre: &Genre) -> f32 {
2 let mut weights = MetricWeights::default();
3 weights.apply_genre_modifiers(genre);
4
5 // Individual component scores (0-100 scale)
6 let lexical_score = score_lexical_diversity(metrics.lexical_diversity, genre);
7 let syntactic_score = score_syntactic_complexity(
8 metrics.sentence_complexity,
9 metrics.avg_sentence_length,
10 genre
11 );
12 let paragraph_score = self.score_paragraph_variance(metrics.paragraph_variance);
13 let dialogue_score = self.score_dialogue_density(metrics.dialogue_density, genre);
14 let coherence_score = self.score_coherence(metrics.coherence_score);
15 let pacing_score = self.score_pacing(metrics.pacing_score);
16
17 // Passive voice penalty (up to -10 points)
18 let passive_penalty = self.calculate_passive_penalty(metrics.passive_voice_ratio);
19
20 // Weighted sum
21 let base_score = lexical_score * weights.lexical_diversity
22 + syntactic_score * weights.syntactic_complexity
23 + paragraph_score * weights.paragraph_variance
24 + dialogue_score * weights.dialogue_balance
25 + coherence_score * weights.coherence
26 + pacing_score * weights.pacing;
27
28 // Apply penalties and clamp to 0-100
29 (base_score - passive_penalty).max(0.0).min(100.0)
30}Default Metric Weights:
1impl MetricWeights {
2 pub fn default() -> Self {
3 Self {
4 lexical_diversity: 0.25,
5 syntactic_complexity: 0.15,
6 paragraph_variance: 0.20,
7 dialogue_balance: 0.15,
8 coherence: 0.15,
9 pacing: 0.10,
10 }
11 }
12
13 pub fn apply_genre_modifiers(&mut self, genre: &Genre) {
14 match genre {
15 Genre::Thriller => {
16 self.pacing *= 1.3;
17 self.dialogue_balance *= 1.2;
18 self.lexical_diversity *= 0.9;
19 }
20 Genre::Literary => {
21 self.lexical_diversity *= 1.4;
22 self.syntactic_complexity *= 1.2;
23 self.pacing *= 0.8;
24 }
25 Genre::Poetry => {
26 self.lexical_diversity *= 1.5;
27 self.coherence *= 1.3;
28 self.dialogue_balance = 0.0;
29 }
30 _ => {}
31 }
32
33 // Renormalize to sum to 1.0
34 let total = self.lexical_diversity + self.syntactic_complexity
35 + self.paragraph_variance + self.dialogue_balance
36 + self.coherence + self.pacing;
37
38 self.lexical_diversity /= total;
39 self.syntactic_complexity /= total;
40 self.paragraph_variance /= total;
41 self.dialogue_balance /= total;
42 self.coherence /= total;
43 self.pacing /= total;
44 }
45}1fn score_against_range(&self, value: f32, min: f32, max: f32) -> f32 {
2 if value >= min && value <= max {
3 return 100.0; // Perfect score in target range
4 }
5
6 let distance = if value < min {
7 min - value
8 } else {
9 value - max
10 };
11
12 // Sigmoid penalty: steeper drop near edges, gentle far away
13 let penalty = self.sigmoid_penalty(distance, 0.1);
14 (100.0 - penalty).max(0.0)
15}
16
17fn sigmoid_penalty(&self, distance: f32, steepness: f32) -> f32 {
18 100.0 / (1.0 + (-steepness * distance).exp())
19}
20
21fn calculate_passive_penalty(&self, ratio: f32) -> f32 {
22 // Penalty for excessive passive voice
23 if ratio > 0.3 {
24 (ratio - 0.3) * 20.0
25 } else {
26 0.0
27 }
28}Genre-Specific Target Ranges:
1impl Genre {
2 pub fn lexical_diversity_target(&self) -> (f32, f32) {
3 match self {
4 Genre::Literary => (0.65, 0.85),
5 Genre::Thriller => (0.55, 0.75),
6 Genre::Mystery => (0.60, 0.80),
7 Genre::Romance => (0.50, 0.70),
8 Genre::Fantasy => (0.60, 0.80),
9 Genre::SciFi => (0.65, 0.85),
10 Genre::YA => (0.55, 0.75),
11 Genre::Poetry => (0.70, 0.95),
12 _ => (0.60, 0.80),
13 }
14 }
15
16 pub fn sentence_length_target(&self) -> (f32, f32) {
17 match self {
18 Genre::Thriller => (12.0, 18.0),
19 Genre::Literary => (15.0, 25.0),
20 Genre::Poetry => (0.0, 0.0), // Exempt
21 Genre::YA => (10.0, 16.0),
22 _ => (12.0, 20.0),
23 }
24 }
25}Nova implements aggressive caching at multiple levels to minimize computational overhead:
1// Global in-memory cache with model-specific keys
2static EMBEDDING_CACHE: OnceLock<Mutex<HashMap<u64, Vec<f32>>>> = OnceLock::new();
3
4/// FNV-1a hash algorithm (faster than SipHash for cache keys)
5fn hash_text(text: &str) -> u64 {
6 let model_suffix = match current_model() {
7 AnalysisModel::Fast => "F",
8 AnalysisModel::Deep => "D",
9 };
10
11 let mut hash: u64 = 0xcbf29ce484222325; // FNV offset basis
12
13 // Hash the text
14 for byte in text.bytes() {
15 hash ^= byte as u64;
16 hash = hash.wrapping_mul(0x100000001b3); // FNV prime
17 }
18
19 // Mix in model identifier
20 for byte in model_suffix.bytes() {
21 hash ^= byte as u64;
22 hash = hash.wrapping_mul(0x100000001b3);
23 }
24
25 hash
26}Cache Performance:
1/// Save embedding to disk using bincode serialization
2fn save_to_disk_cache(hash: u64, embedding: &[f32]) {
3 let cache_dir = get_cache_dir(); // ~/.cache/nova-nlp-embeddings/
4 let file_path = cache_dir.join(format!("{}.bin", hash));
5
6 if let Ok(encoded) = bincode::serialize(embedding) {
7 let _ = std::fs::write(file_path, encoded);
8 }
9}
10
11/// Load embedding from disk
12fn load_from_disk_cache(hash: u64) -> Option<Vec<f32>> {
13 let cache_dir = get_cache_dir();
14 let file_path = cache_dir.join(format!("{}.bin", hash));
15
16 if let Ok(data) = std::fs::read(file_path) {
17 bincode::deserialize(&data).ok()
18 } else {
19 None
20 }
21}Disk Cache Benefits:
~/.cache/nova-nlp-embeddings/ (follows XDG standards)1pub fn compute_sentence_embedding(text: &str) -> Result<Vec<f32>> {
2 let hash = hash_text(text);
3
4 // TIER 1: Memory cache
5 if let Some(embedding) = check_memory_cache(hash) {
6 return Ok(embedding); // ~10μs
7 }
8
9 // TIER 2: Disk cache
10 if let Some(embedding) = load_from_disk_cache(hash) {
11 warm_memory_cache(hash, &embedding); // Promote to L1
12 return Ok(embedding); // ~500μs
13 }
14
15 // TIER 3: Compute with BERT
16 let embedding = bert_forward_pass(text)?; // ~800ms uncached
17
18 // Save to both caches
19 save_to_memory_cache(hash, &embedding);
20 save_to_disk_cache(hash, &embedding);
21
22 Ok(embedding)
23}
24
25 let client = redis::Client::open(redis_url)?;
26 let manager = ConnectionManager::new(client).await?;
27 Ok(Self { client: manager })
28 }
29
30 pub async fn get(&self, story_id: &str) -> Option<AnalysisResult> {
31 let key = format!("analysis:{}", story_id);
32 let mut conn = self.client.clone();
33
34 match conn.get::<_, String>(&key).await {
35 Ok(cached) => serde_json::from_str(&cached).ok(),
36 Err(_) => None,
37 }
38 }
39
40 pub async fn set(&self, story_id: &str, result: &AnalysisResult) -> Result<(), RedisError> {
41 let key = format!("analysis:{}", story_id);
42 let mut conn = self.client.clone();
43
44 let serialized = serde_json::to_string(result)
45 .map_err(|e| RedisError::from((redis::ErrorKind::IoError, "Serialization error", e.to_string())))?;
46
47 // No expiration - content doesn't change after publishing
48 conn.set(&key, serialized).await
49 }
50}The service generates actionable diagnostics for authors:
1pub fn generate_diagnostics(
2 &self,
3 metrics: &StructuralMetrics,
4 genre: &Genre,
5) -> Vec<Diagnostic> {
6 let mut diagnostics = Vec::new();
7
8 // Lexical diversity check
9 let (lex_min, lex_max) = genre.lexical_diversity_target();
10 if metrics.lexical_diversity < lex_min {
11 diagnostics.push(Diagnostic {
12 priority: Priority::High,
13 issue: "Low lexical diversity".to_string(),
14 metric_value: metrics.lexical_diversity,
15 genre_target: format!("{:.2}-{:.2}", lex_min, lex_max),
16 suggestion: "Consider using more varied vocabulary and avoiding repetitive word choices.".to_string(),
17 affected_locations: vec![],
18 });
19 }
20
21 // Passive voice check
22 if metrics.passive_voice_ratio > 0.3 {
23 diagnostics.push(Diagnostic {
24 priority: Priority::Medium,
25 issue: "High passive voice usage".to_string(),
26 metric_value: metrics.passive_voice_ratio,
27 genre_target: "< 0.20".to_string(),
28 suggestion: "Convert passive constructions to active voice for stronger, clearer writing.".to_string(),
29 affected_locations: vec![],
30 });
31 }
32
33 // Dialogue density check
34 let (dial_min, dial_max) = genre.dialogue_density_target();
35 if dial_min > 0.0 && metrics.dialogue_density < dial_min {
36 diagnostics.push(Diagnostic {
37 priority: Priority::Low,
38 issue: "Low dialogue density".to_string(),
39 metric_value: metrics.dialogue_density,
40 genre_target: format!("{:.2}-{:.2}", dial_min, dial_max),
41 suggestion: "Consider adding more dialogue to increase reader engagement.".to_string(),
42 affected_locations: vec![],
43 });
44 }
45
46 diagnostics
47}Benchmarks (10,000-word chapter on Intel i5-12600K, 12 cores):
| Model | Download Size | Load Time | Memory Usage |
|---|---|---|---|
| MiniLM-L6-v2 | 90MB | 2.5s | 350MB |
| E5-large-v2 | 1.3GB | 8s | 2.1GB |
| nlprule (English) | 12MB | 0.8s | 45MB |
| Component | Cold Start | Cached | Memory |
|---|---|---|---|
| Lexical Analysis | 25ms | 25ms | 2MB |
| Syntactic Analysis | 180ms | 180ms | 8MB |
| Discourse Analysis | 40ms | 40ms | 4MB |
| Semantic (BERT) | 850ms | 120ms | 180MB |
| Grammar Check | 420ms | 420ms | 35MB |
| Total (Fast Model) | ~1.5s | ~800ms | ~230MB |
| Total (Deep Model) | ~2.1s | ~1.2s | ~550MB |
1// Performance multipliers with multi-tier caching
2Cache Hit Rate by Usage Pattern:
3- First analysis: 0% cached (full BERT forward pass)
4- Editing session: 85% cached (reused sentence embeddings)
5- Re-analysis: 95% cached (only new/modified sentences computed)
6
7Average speedup with caching: 4.2x for typical editing workflowsOptimization Strategies:
1// Device selection priority
2let device = if cfg!(feature = "cuda") {
3 Device::cuda_if_available(0).unwrap_or(Device::Cpu)
4} else if cfg!(feature = "metal") {
5 Device::new_metal(0).unwrap_or(Device::Cpu)
6} else {
7 Device::Cpu
8};GPU Performance (NVIDIA RTX 3060):
Apple Silicon (M1/M2/M3 with Metal):
Nova's NLP engine is integrated as a native Node.js addon via NAPI-RS, providing zero-copy performance:
1// apps/api/src/index.ts - Startup initialization
2import { initialize, AnalysisModel } from '@workspace/nlp-core';
3
4async function startServer() {
5 console.log('🔧 Initializing NLP engine...');
6
7 // Initialize with Fast model for production (Deep for quality checks)
8 await initialize(AnalysisModel.Fast);
9
10 console.log('✅ NLP engine ready');
11
12 // Start Hono server...
13}1// packages/rpc/src/routes/content.ts
2import {
3 analyzeLexical,
4 analyzeSyntactic,
5 analyzeDiscourse,
6 analyzeSemantic,
7 analyzeSemanticMl,
8 checkGrammar,
9 checkFictionCraft,
10} from '@workspace/nlp-core';
11
12export const analyzeChapterQuality = authenticated.handler(async ({ context, input }) => {
13 const { chapterId } = input;
14
15 // Fetch chapter content from database
16 const chapter = await db.query.chapters.findFirst({
17 where: eq(schema.chapters.id, chapterId),
18 with: {
19 content: true, // Get associated content for genre info
20 },
21 });
22
23 if (!chapter) {
24 throw new Error("Chapter not found");
25 }
26
27 // Fetch latest content from R2/MinIO
28 const contentKey = `${chapter.contentKey}/latest.json.gz`;
29 const editorState = await fetchFromR2(contentKey);
30 const plainText = extractPlainText(editorState); // Convert Lexical JSON to text
31
32 // Run all analyzers in parallel (Rust handles internal caching)
33 const [lexical, syntactic, discourse, semantic, semanticMl, grammar, fictionCraft] =
34 await Promise.all([
35 analyzeLexical(plainText),
36 analyzeSyntactic(plainText),
37 analyzeDiscourse(plainText),
38 analyzeSemantic(plainText),
39 analyzeSemanticMl(plainText),
40 checkGrammar(plainText, chapter.content.primaryGenre),
41 checkFictionCraft(plainText),
42 ]);
43
44 // Combine results into unified quality score
45 const qualityMetrics = {
46 structural: {
47 lexicalDiversity: lexical.lexicalDiversity,
48 vocabularyRichness: lexical.vocabularyRichness,
49 repetitionScore: lexical.repetitionScore,
50 sentenceComplexity: syntactic.sentenceComplexity,
51 pacingScore: discourse.pacingScore,
52 dialogueDensity: discourse.dialogueDensity,
53 },
54 semantic: {
55 coherenceScore: semantic.coherenceScore,
56 narrativeCoherence: semanticMl.narrativeCoherence,
57 emotionalImpact: semanticMl.emotionalImpact,
58 readerEngagementScore: semanticMl.readerEngagementScore,
59 },
60 grammar: {
61 totalIssues: grammar.totalIssues,
62 criticalIssues: grammar.criticalIssues,
63 fictionSpecificIssues: fictionCraft.length,
64 },
65 };
66
67 // Store in database for dashboard
68 await db.insert(schema.chapterAnalysis).values({
69 chapterId: chapter.id,
70 metrics: qualityMetrics,
71 issues: [...grammar.issues, ...fictionCraft],
72 analyzedAt: new Date(),
73 });
74
75 return qualityMetrics;
76});1// apps/api/src/workers/content-analysis.ts
2import { analyzeStyleMl, checkFictionCraft } from '@workspace/nlp-core';
3
4// Worker triggered on content save (debounced via BullMQ)
5export async function analyzeContentWorker(job: Job) {
6 const { contentKey, userId } = job.data;
7
8 // Fetch content from Redis cache (recent edit)
9 const content = await redis.get(`content:${contentKey}`);
10 if (!content) return;
11
12 const text = extractPlainText(JSON.parse(content));
13
14 // Quick style analysis (250ms with caching)
15 const styleMetrics = await analyzeStyleMl(text);
16 const fictionIssues = await checkFictionCraft(text);
17
18 // Send real-time feedback via Socket.io
19 io.to(`user:${userId}`).emit('content:analysis', {
20 contentKey,
21 metrics: styleMetrics,
22 issues: fictionIssues.slice(0, 10), // Top 10 issues only
23 timestamp: Date.now(),
24 });
25}Advanced character development analysis using entity extraction and embedding drift:
1/// Detect character development via embedding trajectory
2fn detect_character_arc(text: &str, paragraphs: &[String], embeddings: &[Vec<f32>]) -> Result<f64> {
3 // Extract character mentions
4 let characters = extract_characters(text);
5
6 for character in &characters {
7 let mut char_embeddings = Vec::new();
8
9 // Find paragraphs mentioning this character
10 for (i, paragraph) in paragraphs.iter().enumerate() {
11 if paragraph.contains(character) && i < embeddings.len() {
12 char_embeddings.push(&embeddings[i]);
13 }
14 }
15
16 if char_embeddings.len() < 3 {
17 continue; // Not enough data
18 }
19
20 // Calculate embedding drift (measures character evolution)
21 let mut drift_scores = Vec::new();
22 for i in 0..char_embeddings.len() - 1 {
23 let similarity = cosine_similarity(char_embeddings[i], char_embeddings[i + 1]);
24 drift_scores.push(1.0 - similarity); // Lower similarity = more change
25 }
26
27 // Ideal character arc: gradual drift (not flat, not chaotic)
28 let mean_drift = drift_scores.iter().sum::<f32>() / drift_scores.len() as f32;
29 return Ok(mean_drift.clamp(0.0, 1.0) as f64);
30 }
31
32 Ok(0.5) // Neutral score if no clear character found
33}Semantic similarity search for finding similar published works:
1use qdrant_client::prelude::*;
2use qdrant_client::qdrant::{CreateCollection, Distance, VectorParams};
3
4/// Index chapter embeddings in Qdrant for semantic search
5pub async fn index_chapter_embeddings(
6 chapter_id: &str,
7 paragraphs: &[String],
8) -> Result<()> {
9 let client = QdrantClient::from_url("http://localhost:6334").build()?;
10
11 // Compute embeddings for all paragraphs
12 let embeddings = compute_embeddings_batch(paragraphs)?;
13
14 // Create collection if not exists
15 let collection_name = "nova_chapters";
16 client.create_collection(&CreateCollection {
17 collection_name: collection_name.to_string(),
18 vectors_config: Some(VectorParams {
19 size: 384, // MiniLM dimension
20 distance: Distance::Cosine.into(),
21 ..Default::default()
22 }.into()),
23 ..Default::default()
24 }).await.ok(); // Ignore error if exists
25
26 // Upsert embeddings
27 let points: Vec<PointStruct> = embeddings
28 .into_iter()
29 .enumerate()
30 .map(|(i, embedding)| PointStruct {
31 id: Some(format!("{}:{}", chapter_id, i).into()),
32 vectors: Some(embedding.into()),
33 payload: [
34 ("chapter_id".to_string(), chapter_id.into()),
35 ("paragraph_index".to_string(), (i as i64).into()),
36 ].into_iter().collect(),
37 })
38 .collect();
39
40 client.upsert_points(collection_name, points, None).await?;
41
42 Ok(())
43}
44
45/// Find semantically similar chapters
46pub async fn find_similar_chapters(
47 query_embedding: Vec<f32>,
48 limit: usize,
49) -> Result<Vec<String>> {
50 let client = QdrantClient::from_url("http://localhost:6334").build()?;
51
52 let results = client.search_points(&SearchPoints {
53 collection_name: "nova_chapters".to_string(),
54 vector: query_embedding,
55 limit: limit as u64,
56 with_payload: Some(true.into()),
57 ..Default::default()
58 }).await?;
59
60 let chapter_ids: Vec<String> = results.result
61 .into_iter()
62 .filter_map(|point| {
63 point.payload.get("chapter_id")
64 .and_then(|v| v.as_str())
65 .map(String::from)
66 })
67 .collect();
68
69 Ok(chapter_ids)
70}Analyze only changed content instead of full chapters:
1/// Compute diff between two editor states and analyze only changes
2pub fn analyze_incremental(
3 old_state: &str,
4 new_state: &str,
5) -> Result<IncrementalAnalysis> {
6 // Extract paragraphs from both states
7 let old_paragraphs = extract_paragraphs(old_state);
8 let new_paragraphs = extract_paragraphs(new_state);
9
10 // Find changed paragraphs (simple LCS algorithm)
11 let mut changed_indices = Vec::new();
12 for (i, (old, new)) in old_paragraphs.iter().zip(&new_paragraphs).enumerate() {
13 if old != new {
14 changed_indices.push(i);
15 }
16 }
17
18 // Analyze only changed paragraphs + context (±2 paragraphs)
19 let mut analysis_indices = HashSet::new();
20 for &idx in &changed_indices {
21 for offset in -2..=2 {
22 let target_idx = (idx as isize + offset).max(0) as usize;
23 if target_idx < new_paragraphs.len() {
24 analysis_indices.insert(target_idx);
25 }
26 }
27 }
28
29 // Batch analyze affected paragraphs
30 let affected_paragraphs: Vec<String> = analysis_indices
31 .iter()
32 .map(|&i| new_paragraphs[i].clone())
33 .collect();
34
35 let embeddings = compute_embeddings_batch(&affected_paragraphs)?;
36
37 // Compute local metrics (coherence, pacing)
38 let coherence = calculate_local_coherence(&affected_paragraphs, &embeddings)?;
39
40 Ok(IncrementalAnalysis {
41 changed_paragraph_count: changed_indices.len(),
42 analyzed_paragraph_count: affected_paragraphs.len(),
43 local_coherence: coherence,
44 needs_full_analysis: changed_indices.len() > 10, // Threshold
45 })
46}Identify tonal shifts that may indicate inconsistent character voice:
1/// Detect style consistency using PCA on embedding distributions
2pub fn analyze_style_consistency(chapter_embeddings: &[Vec<f32>]) -> f64 {
3 // Compute mean embedding
4 let dim = chapter_embeddings[0].len();
5 let mut mean = vec![0.0; dim];
6
7 for embedding in chapter_embeddings {
8 for (i, &val) in embedding.iter().enumerate() {
9 mean[i] += val;
10 }
11 }
12
13 for val in &mut mean {
14 *val /= chapter_embeddings.len() as f32;
15 }
16
17 // Compute variance from mean (low variance = consistent style)
18 let mut total_variance = 0.0;
19 for embedding in chapter_embeddings {
20 let distance: f32 = embedding.iter()
21 .zip(&mean)
22 .map(|(&a, &b)| (a - b).powi(2))
23 .sum();
24 total_variance += distance.sqrt() as f64;
25 }
26
27 let avg_variance = total_variance / chapter_embeddings.len() as f64;
28
29 // Normalize (lower variance = higher consistency score)
30 (1.0 - avg_variance.min(1.0)).max(0.0)
31}Core Technologies:
Models Used:
Performance:
This architecture enables real-time quality feedback during writing while maintaining state-of-the-art semantic understanding through transformer models.
Nova's NLP engine is fundamentally transparent and open:
For Authors: Use quality metrics as guidance, not gospel. The algorithm identifies patterns but cannot judge artistic intent.
Performance Transparency:
Note: This blog post reflects the implementation as of November 2025. The algorithm evolves based on author feedback and advances in NLP research.
Key Principles:
The goal: reward genuine quality, not gaming sophistication.
Event Pipeline
Per-Paragraph Dwell Time Calculation
1interface ReaderEvent {
2 storyId: string;
3 chapterId: string;
4 paragraphIndex: number;
5 timestamp: number;
6 eventType: "scroll" | "pause" | "highlight" | "skip" | "complete";
7 sessionId: string;
8}
9
10function calculateDwellTimes(session: ReaderEvent[]): number[] {
11 const dwellTimes: Map<number, number> = new Map();
12
13 for (let i = 0; i < session.length; i++) {
14 const current = session[i];
15 const currentParagraph = current.paragraphIndex;
16
17 if (i + 1 < session.length) {
18 const next = session[i + 1];
19
20 if (next.paragraphIndex === currentParagraph) {
21 const dwellDelta = next.timestamp - current.timestamp;
22 dwellTimes.set(
23 currentParagraph,
24 (dwellTimes.get(currentParagraph) ?? 0) + dwellDelta
25 );
26 }
27 }
28 }
29
30 return Array.from(dwellTimes.values());
31}Drop-Off Detection
1interface CompletionAnalysis {
2 completionCurve: number[];
3 dropOffIndex: number;
4 dropOffPercentage: number;
5}
6
7function analyzeDropOff(
8 sessions: ReaderEvent[][],
9 totalParagraphs: number
10): CompletionAnalysis {
11 const completionByParagraph = new Map<number, number>();
12
13 for (const session of sessions) {
14 const maxReached = Math.max(...session.map((e) => e.paragraphIndex));
15 for (let i = 0; i <= maxReached; i++) {
16 completionByParagraph.set(i, (completionByParagraph.get(i) ?? 0) + 1);
17 }
18 }
19
20 const cumulativeReaders = Array.from(
21 { length: totalParagraphs },
22 (_, i) => completionByParagraph.get(i) ?? 0
23 );
24
25 const initialReaders = cumulativeReaders[0];
26 const completionCurve = cumulativeReaders.map((r) => r / initialReaders);
27
28 // Find steepest drop
29 let largestDrop = 0;
30 let dropOffIndex = 0;
31
32 for (let i = 0; i < completionCurve.length - 1; i++) {
33 const drop = completionCurve[i] - completionCurve[i + 1];
34 if (drop > largestDrop) {
35 largestDrop = drop;
36 dropOffIndex = i;
37 }
38 }
39
40 return {
41 completionCurve,
42 dropOffIndex,
43 dropOffPercentage: 1.0 - completionCurve[dropOffIndex],
44 };
45}Completion Rate
1function calculateCompletionRate(
2 chapterId: string,
3 platformAverageLengthBias: number = 1
4): number {
5 const completedReaders = countReadersWhoFinished(chapterId);
6 const startedReaders = countReadersWhoStarted(chapterId);
7
8 let completionRate = completedReaders / startedReaders;
9
10 // Weight by platform average to penalize expected drop-off
11 const chapterLength = getChapterLength(chapterId);
12 const avgChapterLength = getPlatformAverageChapterLength();
13
14 const lengthWeighted =
15 completionRate *
16 (1 + (chapterLength - avgChapterLength) / avgChapterLength);
17
18 return Math.min(1, lengthWeighted);
19}
20
21function calculateReturnRate(storyId: string): number {
22 const chapters = getChapters(storyId);
23 const returnRates: number[] = [];
24
25 for (let i = 0; i < chapters.length - 1; i++) {
26 const completedCurrent = countCompletion(chapters[i].id);
27 const completedNext = countCompletion(chapters[i + 1].id);
28
29 returnRates.push(completedNext / completedCurrent);
30 }
31
32 return returnRates.reduce((a, b) => a + b) / returnRates.length;
33}Binge Coefficient (Session Continuity)
1function calculateBingeCoefficient(userId: string): number {
2 const sessions = getUserReadingSessions(userId);
3 const bingeSessions = sessions.filter(
4 (s) => s.chaptersRead.length >= 3
5 ).length;
6
7 const bingeCofficient = bingeSessions / sessions.length;
8
9 return Math.min(1, bingeCofficient);
10}Time-Weighted Engagement
1function calculateTemporalEngagement(events: EngagementEvent[]): number {
2 let weightedEngagement = 0;
3 let totalWeight = 0;
4
5 for (const event of events) {
6 const daysAgo = getDaysSince(event.date);
7
8 // 30-day half-life: engagement from 30 days ago = 50% weight
9 const recencyWeight = Math.exp(-daysAgo / 30);
10
11 weightedEngagement += 1 * recencyWeight;
12 totalWeight += recencyWeight;
13 }
14
15 return weightedEngagement / totalWeight;
16}1interface EngagementMetrics {
2 completionRate: number;
3 returnRate: number;
4 bingeCoefficient: number;
5 coherenceOfDropOffs: number;
6}
7
8function calculateEngagementScore(storyId: string): number {
9 const metrics = fetchAggregatedMetrics(storyId);
10 const story = getStory(storyId);
11
12 // Normalize each metric to 0-1
13 const completionRateNorm = Math.min(1, metrics.completionRate / 0.85);
14 const returnRateNorm = Math.min(1, metrics.returnRate / 0.65);
15 const bingeNorm = Math.min(1, metrics.bingeCoefficient / 0.4);
16
17 const eScoreRaw =
18 completionRateNorm * 0.4 +
19 returnRateNorm * 0.35 +
20 bingeNorm * 0.15 +
21 metrics.coherenceOfDropOffs * 0.1;
22
23 // Apply temporal weighting
24 const daysSincePublish = getDaysSince(story.publishDate);
25
26 let freshnessMultiplier = 1.0;
27 if (daysSincePublish < 30) {
28 freshnessMultiplier = 1.0;
29 } else if (daysSincePublish < 90) {
30 freshnessMultiplier = 0.8;
31 } else {
32 freshnessMultiplier = 0.5;
33 }
34
35 const eScore = eScoreRaw * 100 * freshnessMultiplier;
36
37 return Math.min(100, eScore);
38}Hierarchical Semantic Representation
1interface EmbeddingLevels {
2 sentenceEmbeddings: number[][];
3 paragraphEmbedding: number[];
4 chapterEmbeddings: number[][];
5 storyEmbedding: number[];
6}
7
8function generateHierarchicalEmbeddings(story: Story): EmbeddingLevels {
9 const sentences = tokenizeSentences(story.fullText);
10
11 // Level 1: Sentence embeddings
12 const sentenceEmbeddings = sentences.map((s) =>
13 sentenceTransformer.encode(s)
14 );
15
16 // Level 2: Paragraph embeddings with attention
17 const paragraphs = story.fullText.split(/\n\n+/);
18 const paragraphEmbeddings = paragraphs.map((para) => {
19 const paraSentences = tokenizeSentences(para);
20 const sentenceEmbs = paraSentences.map((s) =>
21 sentenceTransformer.encode(s)
22 );
23
24 // Attention-weighted pooling
25 const importanceScores = sentenceEmbs.map((emb) =>
26 calculateSemanticEntropy(emb)
27 );
28
29 const attentionWeights = softmax(importanceScores);
30 const weighted = attentionWeights.map((w, i) =>
31 sentenceEmbs[i].map((v) => v * w)
32 );
33
34 return meanPooling(weighted);
35 });
36
37 // Level 3: Chapter embeddings
38 const chapters = story.chapters;
39 const chapterEmbeddings = chapters.map((ch) =>
40 meanPooling(paragraphEmbeddings.slice(ch.startIndex, ch.endIndex))
41 );
42
43 // Level 4: Full story
44 const storyEmbedding = meanPooling(chapterEmbeddings);
45
46 return {
47 sentenceEmbeddings,
48 paragraphEmbedding: meanPooling(paragraphEmbeddings),
49 chapterEmbeddings,
50 storyEmbedding,
51 };
52}
53
54function meanPooling(embeddings: number[][]): number[] {
55 const dimensions = embeddings[0].length;
56 const result = new Array(dimensions).fill(0);
57
58 for (const emb of embeddings) {
59 for (let i = 0; i < dimensions; i++) {
60 result[i] += emb[i];
61 }
62 }
63
64 return result.map((v) => v / embeddings.length);
65}
66
67function softmax(values: number[]): number[] {
68 const maxVal = Math.max(...values);
69 const exp = values.map((v) => Math.exp(v - maxVal));
70 const sum = exp.reduce((a, b) => a + b);
71 return exp.map((e) => e / sum);
72}1interface CorpusBank {
2 exemplarEmbeddings: number[][];
3 centroid: number[];
4 type: "acclaim" | "satisfaction" | "genre";
5}
6
7async function initializeAcclaimCorpus(): Promise<CorpusBank> {
8 const awardWinners = [
9 "Embassytown by China Miéville",
10 "The Fifth Season by N.K. Jemisin",
11 "Too Like the Lightning by Ada Palmer",
12 // ... 500+ canonically excellent works
13 ];
14
15 const exemplarEmbeddings = awardWinners.map((work) =>
16 sentenceTransformer.encode(getFullText(work))
17 );
18
19 const centroid = meanPooling(exemplarEmbeddings);
20
21 return {
22 exemplarEmbeddings,
23 centroid,
24 type: "acclaim",
25 };
26}
27
28async function initializeSatisfactionCorpus(): Promise<CorpusBank> {
29 const satisfactionExemplars = await queryDB({
30 completionRate: { $gte: 0.75 },
31 avgRating: { $gte: 4.2 },
32 reviewCount: { $gte: 50 },
33 limit: 1000,
34 });
35
36 const exemplarEmbeddings = satisfactionExemplars.map((work) =>
37 sentenceTransformer.encode(work.fullText)
38 );
39
40 const centroid = meanPooling(exemplarEmbeddings);
41
42 return {
43 exemplarEmbeddings,
44 centroid,
45 type: "satisfaction",
46 };
47}
48
49async function initializeGenreCorpora(): Promise<Map<string, CorpusBank>> {
50 const genres = getAllGenres();
51 const corpora = new Map<string, CorpusBank>();
52
53 for (const genre of genres) {
54 const genreExemplars = await queryDB({
55 genre,
56 publishedBefore: new Date(Date.now() - 2 * 365 * 24 * 60 * 60 * 1000),
57 isRepresentative: true,
58 limit: 200,
59 });
60
61 const exemplarEmbeddings = genreExemplars.map((work) =>
62 sentenceTransformer.encode(work.fullText)
63 );
64
65 corpora.set(genre, {
66 exemplarEmbeddings,
67 centroid: meanPooling(exemplarEmbeddings),
68 type: "genre",
69 });
70 }
71
72 return corpora;
73}1function calculateAlignmentScore(
2 storyEmbedding: number[],
3 primaryGenre: string,
4 corpusBanks: {
5 acclaim: CorpusBank;
6 satisfaction: CorpusBank;
7 genres: Map<string, CorpusBank>;
8 }
9): number {
10 // Compare against each reference bank
11 const similarityToAcclaim = cosineSimilarity(
12 storyEmbedding,
13 corpusBanks.acclaim.centroid
14 );
15
16 const similarityToSatisfaction = cosineSimilarity(
17 storyEmbedding,
18 corpusBanks.satisfaction.centroid
19 );
20
21 const genreCorpus = corpusBanks.genres.get(primaryGenre);
22 const similarityToGenre = genreCorpus
23 ? cosineSimilarity(storyEmbedding, genreCorpus.centroid)
24 : 0;
25
26 const primarySimilarity = Math.max(
27 similarityToAcclaim * 0.4,
28 similarityToSatisfaction * 0.5,
29 similarityToGenre * 0.1
30 );
31
32 // Novelty calculation
33 const allPublishedEmbeddings = getAllPublishedStoryEmbeddings();
34 const nearestNeighborDistance = Math.min(
35 ...allPublishedEmbeddings.map((emb) =>
36 euclideanDistance(storyEmbedding, emb)
37 )
38 );
39
40 const maxDistance = getMaxDistanceInCorpus();
41 const noveltyScore = Math.min(1, nearestNeighborDistance / maxDistance);
42
43 // Penalize if too derivative OR too alien
44 let noveltyPenalty = 0;
45 if (noveltyScore < 0.3) {
46 noveltyPenalty = -15; // Too similar
47 } else if (noveltyScore > 0.8) {
48 noveltyPenalty = -5; // Too different
49 }
50
51 const aScoreRaw = primarySimilarity * 100 + noveltyPenalty;
52
53 return Math.max(0, Math.min(100, aScoreRaw));
54}
55
56function cosineSimilarity(a: number[], b: number[]): number {
57 let dotProduct = 0;
58 let magnitudeA = 0;
59 let magnitudeB = 0;
60
61 for (let i = 0; i < a.length; i++) {
62 dotProduct += a[i] * b[i];
63 magnitudeA += a[i] * a[i];
64 magnitudeB += b[i] * b[i];
65 }
66
67 return dotProduct / (Math.sqrt(magnitudeA) * Math.sqrt(magnitudeB));
68}
69
70function euclideanDistance(a: number[], b: number[]): number {
71 let sum = 0;
72 for (let i = 0; i < a.length; i++) {
73 sum += Math.pow(a[i] - b[i], 2);
74 }
75 return Math.sqrt(sum);
76}1function calculateSatisfactionDelta(storyId: string): number {
2 const ratingsAtMidpoint = getRatingsAtProgress(storyId, 0.5);
3 const ratingsAtCompletion = getRatingsAtProgress(storyId, 0.95);
4
5 const avgMidpoint =
6 ratingsAtMidpoint.reduce((a, b) => a + b) / ratingsAtMidpoint.length;
7 const avgEnd =
8 ratingsAtCompletion.reduce((a, b) => a + b) / ratingsAtCompletion.length;
9
10 const satisfactionDelta = avgEnd - avgMidpoint;
11
12 if (satisfactionDelta > 0.5) {
13 return 100; // Strong payoff
14 } else if (satisfactionDelta > -0.2) {
15 return 70; // Consistent quality
16 } else {
17 return 40; // Disappointing ending
18 }
19}1function calculateQualityScoreWave1(storyId: string): number {
2 const story = getStory(storyId);
3 const sScore = calculateStructuralScore(story, story.genre);
4 const eScore = calculateEngagementScore(storyId);
5
6 // Newness boost: new stories get visibility runway
7 const daysSincePublish = getDaysSince(story.publishDate);
8 const newnessBoost = 20 * Math.exp(-daysSincePublish / 30);
9 // At day 0: +20 points
10 // At day 30: +10 points
11 // At day 90: +0.7 points
12
13 const qualityScore = sScore * 0.4 + eScore * 0.4 + newnessBoost * 0.2;
14
15 return Math.min(100, qualityScore);
16}1function calculateQualityScoreWave2(storyId: string): number {
2 const story = getStory(storyId);
3 const sScore = calculateStructuralScore(story, story.genre);
4 const eScore = calculateEngagementScore(storyId);
5 const embeddings = generateHierarchicalEmbeddings(story);
6 const aScore = calculateAlignmentScore(
7 embeddings.storyEmbedding,
8 story.genre,
9 getCorpusBanks()
10 );
11 const deltaScore = calculateSatisfactionDelta(storyId);
12
13 const daysSincePublish = getDaysSince(story.publishDate);
14 const newnessBoost = 20 * Math.exp(-daysSincePublish / 30);
15
16 const qualityScore =
17 sScore * 0.2 +
18 eScore * 0.35 +
19 aScore * 0.25 +
20 deltaScore * 0.1 +
21 newnessBoost * 0.1;
22
23 return Math.min(100, qualityScore);
24}1interface CachedScore {
2 value: number;
3 updatedAt: Date;
4 ttl: number;
5}
6
7async function updateEngagementScoreIncremental(
8 storyId: string,
9 newEvents: ReaderEvent[]
10): Promise<number> {
11 const cached = await cache.get<CachedScore>(`e_score:${storyId}`);
12 const oldEScore = cached?.value ?? 0;
13
14 // Calculate new data impact
15 const newEngagementValue = processEvents(newEvents);
16
17 // Learning rate decreases with time (stabilizes over time)
18 const totalReadersHistorical = countTotalReaders(storyId);
19 const alpha = 1.0 / (1.0 + totalReadersHistorical);
20
21 // Exponential moving average
22 const newEScore = alpha * newEngagementValue + (1 - alpha) * oldEScore;
23
24 // Cache with 5-minute TTL
25 await cache.set(`e_score:${storyId}`, {
26 value: newEScore,
27 updatedAt: new Date(),
28 ttl: 300,
29 });
30
31 return newEScore;
32}Genre Affinity Calculation
1function calculateGenreAffinityMultiplier(
2 userId: string,
3 storyGenre: string
4): number {
5 const userCompletionInGenre = getCompletionRate(userId, storyGenre);
6 const platformAvgCompletion = getPlatformAverage(storyGenre);
7
8 // Higher ratio = user really likes this genre
9 const affinityRatio = userCompletionInGenre / platformAvgCompletion;
10
11 // Cap at reasonable bounds
12 // -0.5 = never finishes this genre
13 // 0.0 = platform average
14 // 1.0 = consistently 2x platform average
15 return Math.max(-0.5, Math.min(1.0, affinityRatio - 1.0));
16}
17
18function personalizeRankingScore(
19 qualityScore: number,
20 userId: string,
21 story: Story
22): number {
23 const genreAffinity = calculateGenreAffinityMultiplier(userId, story.genre);
24 const trustCoefficient = calculateReaderTrustCoefficient(userId);
25
26 return qualityScore * (1 + genreAffinity * 0.2) * trustCoefficient;
27}1function rankNewStoryPhase1(storyId: string): number {
2 const story = getStory(storyId);
3 const sScore = calculateStructuralScore(story, story.genre);
4
5 // Maximum newness boost
6 const newnessBoost = 20;
7
8 const initialRank = sScore * 0.8 + newnessBoost * 0.2;
9
10 return initialRank;
11}
12
13// Example:
14// Well-written thriller: 85/100 craft
15// Initial rank: 85 * 0.8 + 20 * 0.2 = 72
16// Gets visibility in recommendation feedGuaranteed Initial Exposure
1interface StratifiedSample {
2 genreVeterans: string[];
3 genreExplorers: string[];
4 speedReaders: string[];
5 carefulReaders: string[];
6}
7
8async function distributeNewStoryToSampleReaders(
9 storyId: string
10): Promise<void> {
11 const story = getStory(storyId);
12
13 // Sample diverse readers
14 const sampleReaders: StratifiedSample = {
15 genreVeterans: await stratifiedSample("genre_veterans", story.genre, 40),
16 genreExplorers: await stratifiedSample("genre_explorers", "other", 30),
17 speedReaders: await stratifiedSample("speed_readers", "fast", 15),
18 carefulReaders: await stratifiedSample("careful_readers", "slow", 15),
19 };
20
21 // Distribute story to sample
22 for (const readerId of Object.values(sampleReaders).flat()) {
23 await addToDiscoveryFeed(readerId, storyId, "featured");
24 }
25}1function rankStoryPhase2(storyId: string, daysSincePublish: number): number {
2 const story = getStory(storyId);
3 const sScore = calculateStructuralScore(story, story.genre);
4 const eScore = calculateEngagementScore(storyId);
5
6 // Linear interpolation over 22 days
7 const daysIntoPhase2 = daysSincePublish - 8;
8 const progress = daysIntoPhase2 / 22;
9
10 const sWeight = 0.7 - progress * 0.5; // 0.7 → 0.2
11 const eWeight = 0.3 + progress * 0.5; // 0.3 → 0.8
12
13 const newnessBoost = 20 * Math.exp(-daysSincePublish / 30);
14
15 const phase2Rank = sScore * sWeight + eScore * eWeight + newnessBoost * 0.1;
16
17 return phase2Rank;
18}
19
20// Example timeline:
21// Day 8: S=70%, E=30% (early engagement signal weak)
22// Day 15: S=50%, E=50% (balanced)
23// Day 30: S=20%, E=80% (engagement dominates)1function rankStoryPhase3(storyId: string): number {
2 return calculateQualityScoreWave2(storyId);
3}1interface AnomalyMetrics {
2 readTimeUniformity: number;
3 interactionClustering: number;
4 engagementDeviation: number;
5 velocityAnomalies: number;
6 deviceConsistency: number;
7 geographicClustering: number;
8}
9
10function detectAnomalousEngagement(
11 storyId: string,
12 newEvents: ReaderEvent[]
13): number {
14 const metrics: AnomalyMetrics = {
15 readTimeUniformity: calculateReadTimeVariance(newEvents),
16 interactionClustering: detectBurstPatterns(newEvents),
17 engagementDeviation: compareEngagementToPeers(storyId, newEvents),
18 velocityAnomalies: detectUnnaturaScrollPatterns(newEvents),
19 deviceConsistency: detectSameDeviceUsage(newEvents),
20 geographicClustering: detectIPConcentration(newEvents),
21 };
22
23 const suspiciousScore =
24 metrics.readTimeUniformity * 0.25 +
25 metrics.interactionClustering * 0.25 +
26 metrics.engagementDeviation * 0.2 +
27 metrics.velocityAnomalies * 0.15 +
28 metrics.deviceConsistency * 0.1 +
29 metrics.geographicClustering * 0.05;
30
31 if (suspiciousScore > ANOMALY_THRESHOLD) {
32 flagForReview(storyId, "Anomalous engagement pattern", suspiciousScore);
33 return 0.5; // 50% weight for engagement
34 }
35
36 return 1.0; // Normal engagement
37}Specific Anomaly Detectors
1function calculateReadTimeVariance(events: ReaderEvent[]): number {
2 const readTimes = [];
3 for (let i = 0; i < events.length - 1; i++) {
4 if (events[i].eventType === "view_paragraph") {
5 readTimes.push(events[i + 1].timestamp - events[i].timestamp);
6 }
7 }
8
9 const mean = readTimes.reduce((a, b) => a + b) / readTimes.length;
10 const variance =
11 readTimes.reduce((sum, t) => sum + Math.pow(t - mean, 2), 0) /
12 readTimes.length;
13 const stdDev = Math.sqrt(variance);
14 const coefficientOfVariation = stdDev / mean;
15
16 // Real humans: high variation (0.8-1.2)
17 // Bots: low variation (0.1-0.3)
18 if (coefficientOfVariation < 0.3) {
19 return 0.9; // Highly suspicious
20 }
21 return 0.0; // Normal
22}
23
24function detectBurstPatterns(events: ReaderEvent[]): number {
25 const hourlyBuckets = new Map<number, number>();
26
27 for (const event of events) {
28 const hourKey = Math.floor(event.timestamp / 3600);
29 hourlyBuckets.set(hourKey, (hourlyBuckets.get(hourKey) ?? 0) + 1);
30 }
31
32 const eventCounts = Array.from(hourlyBuckets.values());
33 const mean = eventCounts.reduce((a, b) => a + b) / eventCounts.length;
34
35 // Gini coefficient: 0=perfect equality, 1=perfect inequality
36 const gini = calculateGiniCoefficient(eventCounts);
37
38 // Coordinated: high Gini (0.7+)
39 // Distributed: low Gini (0.3-)
40 if (gini > 0.7) {
41 return 0.85; // Likely coordinated
42 }
43 return 0.0; // Normal distribution
44}
45
46function detectUnnaturalScrollPatterns(events: ReaderEvent[]): number {
47 const scrollPositions = events
48 .filter((e) => e.eventType === "scroll")
49 .map((e) => (e as any).scrollY);
50
51 const scrollDeltas = [];
52 for (let i = 0; i < scrollPositions.length - 1; i++) {
53 scrollDeltas.push(scrollPositions[i + 1] - scrollPositions[i]);
54 }
55
56 const uniqueDeltas = new Set(scrollDeltas);
57
58 if (uniqueDeltas.size < 3 && scrollDeltas.length > 20) {
59 return 0.9; // Only 1-2 different patterns
60 }
61 return 0.0;
62}
63
64function calculateGiniCoefficient(values: number[]): number {
65 const sorted = [...values].sort((a, b) => a - b);
66 const n = sorted.length;
67 const mean = sorted.reduce((a, b) => a + b) / n;
68
69 let sum = 0;
70 for (let i = 0; i < n; i++) {
71 sum += (2 * (i + 1) - n - 1) * sorted[i];
72 }
73
74 return sum / (n * n * mean);
75}1function calculateReaderTrustCoefficient(readerId: string): number {
2 const reader = getReader(readerId);
3
4 // Account age (days since signup)
5 const accountAgeMonths =
6 (Date.now() - reader.signupDate.getTime()) / (30 * 24 * 60 * 60 * 1000);
7 const ageScore = Math.min(1, accountAgeMonths / 12); // Cap at 12 months
8
9 // Reading history (completion rate)
10 const completionRate = reader.storiesCompleted / reader.storiesStarted;
11 const completionScore = Math.min(1, completionRate / 0.7);
12
13 // Rating variance
14 const ratings = getReaderRatings(readerId);
15 const mean = ratings.reduce((a, b) => a + b) / ratings.length;
16 const variance =
17 ratings.reduce((sum, r) => sum + Math.pow(r - mean, 2), 0) / ratings.length;
18 const stdDev = Math.sqrt(variance);
19 const varianceScore = Math.min(1, stdDev / 1.5);
20
21 // Comment quality
22 const comments = getReaderComments(readerId);
23 const avgCommentLength =
24 comments.reduce((sum, c) => sum + c.length, 0) / comments.length;
25 const feedbackScore = Math.min(1, (avgCommentLength / 100) * 0.5);
26
27 // Composite trust
28 const trustCoefficient =
29 ageScore * 0.3 +
30 completionScore * 0.4 +
31 varianceScore * 0.2 +
32 feedbackScore * 0.1;
33
34 // Map to multiplier: [0.5, 2.0]
35 return 0.5 + trustCoefficient * 1.5;
36}Application to Engagement
1function calculateWeightedEngagementScore(storyId: string): number {
2 const allEngagements = getAllEngagementEvents(storyId);
3
4 let weightedTotal = 0;
5 for (const engagement of allEngagements) {
6 const trustCoeff = calculateReaderTrustCoefficient(engagement.readerId);
7 const engagementValue = 1.0; // Base value
8 weightedTotal += engagementValue * trustCoeff;
9 }
10
11 const weightedAvg = weightedTotal / allEngagements.length;
12
13 return weightedAvg;
14}1function initializeHoneypotMetrics(): string[] {
2 return [
3 "paragraph_11_completion", // Not correlated with quality
4 "monday_engagement_bonus", // Arbitrary temporal signal
5 "first_sentence_length_ratio", // Too game-able
6 "dialogue_percentage_drift", // Shouldn't matter after Chapter 1
7 ];
8}
9
10function randomizeMetricWeightsMonthly(): Record<string, number> {
11 const baseWeights: Record<string, number> = {
12 s_score: 0.2,
13 e_score: 0.35,
14 a_score: 0.25,
15 delta_score: 0.1,
16 newness: 0.1,
17 };
18
19 // Monthly random jitter (±10%)
20 const currentMonth = new Date().getMonth();
21 const seed = currentMonth; // Consistent within month
22
23 const jitter: Record<string, number> = {};
24 for (const key in baseWeights) {
25 jitter[key] = 0.9 + Math.random() * 0.2; // [0.9, 1.1]
26 }
27
28 // Adjust weights
29 const adjustedWeights: Record<string, number> = {};
30 for (const key in baseWeights) {
31 adjustedWeights[key] = baseWeights[key] * jitter[key];
32 }
33
34 // Renormalize to sum to 1.0
35 const total = Object.values(adjustedWeights).reduce((a, b) => a + b);
36 for (const key in adjustedWeights) {
37 adjustedWeights[key] /= total;
38 }
39
40 return adjustedWeights;
41}Event ingestion: <100ms from user action to Kafka
Stream processing: <30s aggregate window to score update
Cache refresh: <5 minutes for engagement scores
Ranking query: <50ms personalized rank per user
API response: <200ms discovery feed with network
1interface AuthorDashboardData {
2 overallScore: number;
3 rankInGenre: number;
4 percentile: number;
5 trend: "up" | "down" | "stable";
6 structural: ComponentBreakdown;
7 engagement: EngagementBreakdown;
8 actionableInsights: Insight[];
9 completionHeatmap: number[][];
10}
11
12interface ComponentBreakdown {
13 score: number;
14 percentile: number;
15 components: {
16 lexicalDiversity: ComponentScore;
17 pacing: ComponentScore;
18 dialogue: ComponentScore;
19 complexity: ComponentScore;
20 };
21}
22
23interface ComponentScore {
24 score: number;
25 max: number;
26 percentile: number;
27 feedback: string;
28}
29
30interface Insight {
31 priority: "high" | "medium" | "low";
32 issue: string;
33 location: string;
34 description: string;
35 possibleCauses: string[];
36 suggestions: string[];
37 similarExamples: SimilarStoryExample[];
38}1function generateBenchmarkComparison(storyId: string): Record<string, number> {
2 const story = getStory(storyId);
3
4 const comparableStories = queryDB({
5 genre: story.genre,
6 wordCountMin: story.wordCount * 0.8,
7 wordCountMax: story.wordCount * 1.2,
8 publishedDaysMin: story.daysSincePublish - 30,
9 publishedDaysMax: story.daysSincePublish + 30,
10 limit: 500,
11 });
12
13 return {
14 structural_score: calculatePercentileRank(story.sScore, comparableStories),
15 engagement_score: calculatePercentileRank(story.eScore, comparableStories),
16 overall_score: calculatePercentileRank(
17 story.qualityScore,
18 comparableStories
19 ),
20 };
21}
22
23function calculatePercentileRank(value: number, values: number[]): number {
24 const sorted = values.sort((a, b) => a - b);
25 const position = sorted.findIndex((v) => v >= value);
26 return (position / sorted.length) * 100;
27}1function anonymizeReaderId(rawReaderId: string): string {
2 const salt = process.env.ANONYMIZATION_SALT || "default-salt";
3 const hash = crypto
4 .createHash("sha256")
5 .update(rawReaderId + salt)
6 .digest("hex");
7
8 return hash.substring(0, 16); // 128-bit identifier
9}1interface RetentionPolicy {
2 rawEvents: number;
3 personalIdentifiers: number;
4 aggregatedMetrics: null;
5 errorLogs: number;
6}
7
8const retentionPolicies: RetentionPolicy = {
9 rawEvents: 90,
10 personalIdentifiers: 30,
11 aggregatedMetrics: null,
12 errorLogs: 14,
13};
14
15async function enforceDataRetention(): Promise<void> {
16 for (const [dataType, retentionDays] of Object.entries(retentionPolicies)) {
17 if (retentionDays === null) continue;
18
19 const cutoffDate = new Date();
20 cutoffDate.setDate(cutoffDate.getDate() - retentionDays);
21
22 await deleteWhere(dataType, {
23 createdAt: { $lt: cutoffDate },
24 });
25 }
26}1async function exportAuthorData(storyId: string): Promise<string> {
2 const visibleData = {
3 storyMetadata: getStoryMetadata(storyId),
4 aggregatedMetrics: getAggregatedMetrics(storyId),
5 dashboardScores: getDashboardData(storyId),
6 userFeedback: getReviewsAndComments(storyId),
7 completionStatistics: getCompletionStats(storyId),
8 };
9
10 // NOT included:
11 // - Individual reader identities
12 // - Personal user data beyond anonymized IDs
13 // - IP addresses or geolocation
14
15 return JSON.stringify(visibleData, null, 2);
16}1function analyzeCharacterArcs(storyId: string): Promise<CharacterAnalysis[]> {
2 // NER + coreference resolution
3 // Track character sentiment over narrative time
4 // Identify positive/negative arcs
5 return Promise.resolve([]);
6}
7
8function analyzePlotStructure(storyId: string): Promise<PlotAnalysis> {
9 // Scene extraction + scene summary
10 // Narrative arc fitting
11 // Tension curve visualization
12 return Promise.resolve({} as PlotAnalysis);
13}Nova's algorithm is fundamentally transparent:
The goal: reward genuine quality, not gaming sophistication.