Files
qds/app/Services/GeminiChatbotService.php
dhanabalan 203d09712b
Some checks failed
Scan for leaked secrets using Kingfisher / kingfisher-secrets-scan (push) Has been cancelled
Gemini PR Review / Gemini PR Review (pull_request) Has been cancelled
Scan for leaked secrets using Kingfisher / kingfisher-secrets-scan (pull_request) Has been cancelled
Laravel Larastan / larastan (pull_request) Has been cancelled
Laravel Pint / pint (pull_request) Has been cancelled
Added chat bot pages
2026-05-20 11:21:57 +05:30

820 lines
35 KiB
PHP

<?php
namespace App\Services;
use Illuminate\Support\Facades\DB;
use Illuminate\Support\Facades\Http;
use Illuminate\Support\Facades\Log;
/**
* GeminiChatbotService
* ─────────────────────────────────────────────────────────────────────────────
* Powers the "Advanced" chatbot mode with plain-English understanding.
*
* FLOW:
* 1. Takes the full chat history + new user message.
* 2. Sends them to Gemini with a structured system prompt.
* 3. Gemini returns JSON: { task, params, missing, clarification }.
* 4. If task is a known, complete query → runs the matching DB handler.
* 5. If task is "unknown" → sends the message to Gemini again as a free-form
* conversational assistant so it can answer or ask the user what they need.
* 6. If required params are missing → Gemini's clarification question is returned.
*
* SUPPORTED TASKS:
* - invoice_status → scan status of an invoice number
* - invoice_report → serial/material invoice type for an item + plant
* - production_report → production count for a plant / line / date range
* - unknown → handled via a free-form Gemini conversation turn
*
* CONFIGURATION (.env):
* GEMINI_API_KEY=your_key_here
* GEMINI_MODEL=gemini-2.5-flash-preview ← set whichever model your key supports
*
* HOW TO ADD A NEW TASK:
* 1. Describe it in buildSystemPrompt() under TASKS.
* 2. Add a private handle*() method below.
* 3. Add a match arm in processMessage().
*/
class GeminiChatbotService
{
private string $apiKey;
/**
* Full REST endpoint — model name is part of the URL path, e.g.:
* https://generativelanguage.googleapis.com/v1beta/models/gemini-2.5-flash-preview:generateContent
*
* Built in __construct() from config('services.gemini.model').
* Change GEMINI_MODEL in .env to switch models without editing code.
*/
private string $apiUrl;
public function __construct()
{
$this->apiKey = config('services.gemini.api_key', '');
$model = config('services.gemini.model', 'gemini-3-flash-preview');
$this->apiUrl = "https://generativelanguage.googleapis.com/v1beta/models/{$model}:generateContent";
}
// ─────────────────────────────────────────────────────────────────────────
// Public entry point
// ─────────────────────────────────────────────────────────────────────────
/**
* Process a user message in context of the prior conversation.
*
* @param array $chatHistory Previous turns: [['role'=>'user'|'assistant','content'=>'…'], …]
* @param string $userInput The new message just typed.
* @return string Plain-text reply to show in the chat bubble.
*/
public function processMessage(array $chatHistory, string $userInput): string
{
if (empty($this->apiKey)) {
return '⚠️ AI features are not configured. Please set GEMINI_API_KEY in your .env file.';
}
// ── Step 1: Classify the intent ───────────────────────────────────────
try {
$classification = $this->classifyWithGemini($chatHistory, $userInput);
} catch (\RuntimeException $e) {
// Surface the specific error directly in the chat bubble
return $e->getMessage();
}
$task = $classification['task'] ?? 'unknown';
$params = $classification['params'] ?? [];
$missing = $classification['missing'] ?? [];
$clarification = $classification['clarification'] ?? null;
// ── Step 2: Missing required params → ask user for them ───────────────
if (! empty($missing) && ! empty($clarification)) {
return $clarification;
}
// ── Step 3: Dispatch to the appropriate handler ───────────────────────
return match ($task) {
'invoice_status' => $this->handleInvoiceStatus($params),
'invoice_report' => $this->handleInvoiceReport($params),
'production_report' => $this->handleProductionReport($params),
// ── Unknown intent: hand off to Gemini as a free-form assistant ──
'unknown' => $this->handleUnknown($chatHistory, $userInput, $clarification),
default => $this->handleUnknown($chatHistory, $userInput, null),
};
}
// ─────────────────────────────────────────────────────────────────────────
// Gemini API calls
// ─────────────────────────────────────────────────────────────────────────
/**
* Phase 1 — Classify the user's intent and extract structured params.
*
* @return array|null Parsed JSON array, or null on failure.
*/
/**
* @throws \RuntimeException with a user-facing message describing exactly what failed.
*/
private function classifyWithGemini(array $history, string $userInput): array
{
$requestBody = [
'system_instruction' => [
'parts' => [['text' => $this->buildSystemPrompt()]],
],
'contents' => $this->buildGeminiContents($history, $userInput),
'generationConfig' => [
'temperature' => 0.1,
'responseMimeType' => 'application/json',
],
];
// ── HTTP call ─────────────────────────────────────────────────────────
try {
$response = Http::withHeaders(['Content-Type' => 'application/json'])
->timeout(15)
->post($this->apiUrl . '?key=' . $this->apiKey, $requestBody);
} catch (\Exception $e) {
Log::error('GeminiChatbotService: HTTP exception', ['error' => $e->getMessage()]);
throw new \RuntimeException(
'⚠️ Could not reach the Gemini API. Check your network or firewall. ('
. $e->getMessage() . ')'
);
}
// ── HTTP-level error ──────────────────────────────────────────────────
if (! $response->successful()) {
$status = $response->status();
$body = $response->body();
Log::error('GeminiChatbotService: API HTTP error', [
'status' => $status,
'body' => $body,
]);
// Parse Google's error message when available
$googleMsg = $response->json('error.message') ?? $body;
throw new \RuntimeException(
"⚠️ Gemini API returned HTTP {$status}: {$googleMsg}"
);
}
// ── Extract text from response ────────────────────────────────────────
$text = $response->json('candidates.0.content.parts.0.text');
if (empty($text)) {
// Check for prompt-blocking
$blockReason = $response->json('promptFeedback.blockReason');
$finishReason = $response->json('candidates.0.finishReason');
Log::error('GeminiChatbotService: empty response text', [
'blockReason' => $blockReason,
'finishReason' => $finishReason,
'full' => $response->json(),
]);
$hint = $blockReason
? "prompt was blocked (reason: {$blockReason})"
: ($finishReason ? "finish reason: {$finishReason}" : 'no text returned');
throw new \RuntimeException("⚠️ Gemini returned no content — {$hint}");
}
// ── JSON decode ───────────────────────────────────────────────────────
$clean = preg_replace('/^```json\s*/i', '', trim($text));
$clean = preg_replace('/\s*```$/i', '', $clean);
$parsed = json_decode($clean, true);
if (json_last_error() !== JSON_ERROR_NONE) {
Log::error('GeminiChatbotService: JSON decode failed', ['raw' => $text]);
throw new \RuntimeException(
'⚠️ Gemini returned a non-JSON response: ' . mb_substr($text, 0, 200)
);
}
return $parsed;
}
/**
* Phase 2 (unknown task only) — Ask Gemini to respond conversationally.
*
* Sends the full chat history + new user message to Gemini as a friendly
* factory-operations assistant (no JSON constraint). Gemini can ask for
* clarification, answer general questions, or guide the user to one of the
* supported tasks.
*
* @param string|null $hintFromClassification Optional clarification from the
* classification step — prepended as assistant context if present.
* @return string Plain-text reply from Gemini.
*/
private function callGeminiConversational(
array $history,
string $userInput,
?string $hintFromClassification = null
): ?string {
// If the classifier already produced a good clarification question, use it
// directly and skip the second API call to save latency + quota.
if (! empty($hintFromClassification)) {
return $hintFromClassification;
}
$requestBody = [
'system_instruction' => [
'parts' => [['text' => $this->buildConversationalSystemPrompt()]],
],
'contents' => $this->buildGeminiContents($history, $userInput),
'generationConfig' => [
'temperature' => 0.7, // more natural conversational tone
'maxOutputTokens' => 400,
],
];
try {
$response = Http::withHeaders(['Content-Type' => 'application/json'])
->timeout(20)
->post($this->apiUrl . '?key=' . $this->apiKey, $requestBody);
if (! $response->successful()) {
Log::error('GeminiChatbotService: API error (conversational)', [
'status' => $response->status(),
'body' => $response->body(),
]);
return null;
}
return $response->json('candidates.0.content.parts.0.text');
} catch (\Exception $e) {
Log::error('GeminiChatbotService: exception (conversational)', ['error' => $e->getMessage()]);
return null;
}
}
// ─────────────────────────────────────────────────────────────────────────
// Prompt builders
// ─────────────────────────────────────────────────────────────────────────
/**
* System prompt for Phase 1 (classification) — forces JSON output.
*/
private function buildSystemPrompt(): string
{
$today = now()->format('Y-m-d');
$startOfMonth = now()->startOfMonth()->format('Y-m-d');
return <<<PROMPT
You are a factory operations assistant that classifies plain-English user queries into structured tasks.
TASKS:
1. "invoice_status"
The user wants to check how many serial numbers in an invoice have been scanned / not scanned.
Required params: invoice_number
Examples:
- "check invoice 3RA0013333"
- "is invoice 3RA0013333 fully scanned?"
- "what's the scan status of 3RA0013333"
- "show me unscanned serials for invoice ABC123"
2. "invoice_report"
The user wants to know whether an item is a serial invoice or material invoice for a given plant.
Required params: item_code, plant_name
Examples:
- "check item 674071 for plant Vahinie Unit 2"
- "is item 500100 a serial or material invoice in Chennai plant?"
- "what type is item code 200300 at Vahinie?"
3. "production_report"
The user wants the production count for a plant and optional line over a date range.
Required params: plant_name
Optional params: line_name, date_from, date_to
Default dates: date_from = {$startOfMonth}, date_to = {$today}
Interpret relative dates: "this month", "last week", "yesterday", "today", etc.
Examples:
- "show production for Chennai plant this month"
- "how many units were produced in line 1 of Vahinie Unit 2 last week?"
- "production report for all plants from 2024-01-01 to 2024-01-31"
4. "unknown"
The query cannot be clearly matched to any of the above tasks.
Use this when the user is asking something general, greeting, asking what the bot can do,
asking a follow-up question that doesn't map to a task, or if you need more context.
In this case, set "clarification" to a friendly, helpful response — it may be a question,
a helpful explanation, or guidance toward the supported tasks.
CONVERSATION CONTEXT:
Consider the full conversation history. If the previous assistant message asked a clarifying question
(e.g. "Do you mean Invoice Status or Invoice Report?") and the user is now answering that question,
classify accordingly based on the combined context.
OUTPUT FORMAT (return ONLY this JSON, no markdown fences, no extra text):
{
"task": "invoice_status | invoice_report | production_report | unknown",
"params": {
"invoice_number": "...",
"item_code": "...",
"plant_name": "...",
"line_name": "...",
"date_from": "YYYY-MM-DD",
"date_to": "YYYY-MM-DD"
},
"missing": ["list of required params that were not found in the user input"],
"clarification": "Friendly response for unknown tasks or missing-param prompts. Set to null when task is clear and params are complete."
}
RULES:
- Only include params relevant to the detected task.
- If a required param is missing, add it to "missing" and set a helpful "clarification" asking only for that missing value.
- If task is "unknown", always set "missing" to [] and put your full helpful response in "clarification".
- When task is clear and all required params are present, set "missing" to [] and "clarification" to null.
PROMPT;
}
/**
* System prompt for Phase 2 (conversational fallback) — plain-text output.
*
* Used only when the classifier returns "unknown" AND produced no clarification.
*/
private function buildConversationalSystemPrompt(): string
{
return <<<PROMPT
You are a helpful factory operations assistant integrated into an internal management panel.
You can perform these tasks when the user gives you enough information:
• Invoice Status — check how many serial numbers in an invoice have been scanned and list any unscanned ones. Requires: invoice number.
• Invoice Report — find out whether an item is a serial invoice or material invoice for a given plant. Requires: item code and plant name.
• Production Report — get the production count for a plant, optionally filtered by line and date range. Requires: plant name.
When the user's request does not match any of the above:
- Answer general questions helpfully and concisely.
- If you need more information to perform a task, ask for only the missing detail.
- Guide the user toward one of the supported tasks when relevant.
- Keep replies short and conversational (2-4 sentences max).
- Do NOT mention JSON, APIs, or technical internals.
PROMPT;
}
/**
* Convert Livewire chat history + new user message into Gemini's contents array.
* Gemini uses "model" for the assistant role (not "assistant").
*/
private function buildGeminiContents(array $history, string $userInput): array
{
$contents = [];
// Include the last 8 turns at most to stay within token limits
foreach (array_slice($history, -8) as $msg) {
$contents[] = [
'role' => $msg['role'] === 'user' ? 'user' : 'model',
'parts' => [['text' => $msg['content']]],
];
}
$contents[] = [
'role' => 'user',
'parts' => [['text' => $userInput]],
];
return $contents;
}
// ─────────────────────────────────────────────────────────────────────────
// Task handlers
// ─────────────────────────────────────────────────────────────────────────
/**
* Invoice Status — delegate to ChatbotService (regex-based, same as basic mode).
*/
private function handleInvoiceStatus(array $params): string
{
$invoiceNumber = trim(preg_replace('/\s+/', '', $params['invoice_number'] ?? ''));
if (empty($invoiceNumber)) {
return 'I need the invoice number to check the scan status. What is the invoice number?';
}
/** @var ChatbotService $svc */
$svc = app(ChatbotService::class);
return $svc->ask("invoice = {$invoiceNumber}");
}
/**
* Invoice Report — resolves plant name with fuzzy matching, then runs the
* same CTE query as ChatBot::fetchInvoiceReport() directly.
*
* We bypass ChatbotService::ask() here so that resolvePlant()'s multi-strategy
* fuzzy logic is applied rather than the simpler LIKE inside ChatbotService.
*/
private function handleInvoiceReport(array $params): string
{
$itemCode = trim($params['item_code'] ?? '');
$plantName = trim($params['plant_name'] ?? '');
if (empty($itemCode)) {
return 'I need the item code to look up the invoice type. What is the item code?';
}
if (empty($plantName)) {
return 'I need the plant name to look up the invoice type. Which plant are you asking about?';
}
// ── Fuzzy-resolve the plant name ──────────────────────────────────────
$plant = $this->resolvePlant($plantName);
if ($plant === null) {
return "I couldn't find a plant matching \"{$plantName}\". "
. 'Please check the plant name and try again.';
}
// ── Run the same CTE as ChatBot::fetchInvoiceReport() ─────────────────
try {
$rows = DB::select("
WITH plant_item AS (
SELECT ? AS user_plant,
? AS user_item_code
),
t1 AS (
SELECT
plants.id AS plant_id,
plants.name AS plant_name,
ARRAY_AGG(items.code) AS item_codes
FROM plants
LEFT JOIN items ON plants.id = items.plant_id
GROUP BY plants.id, plants.name
),
t2 AS (
SELECT
t1.plant_id,
t1.plant_name,
CASE
WHEN plant_item.user_item_code = ANY(t1.item_codes) THEN 1
ELSE 0
END AS exists_flag
FROM t1
CROSS JOIN plant_item
WHERE t1.plant_name = plant_item.user_plant
),
t3 AS (
SELECT t2.plant_id, t2.plant_name, t2.exists_flag,
plant_item.user_item_code
FROM t2
LEFT JOIN plant_item ON plant_item.user_plant = t2.plant_name
),
t4 AS (
SELECT items.id AS item_id,
t3.plant_id, t3.plant_name, t3.exists_flag, t3.user_item_code
FROM t3
LEFT JOIN items
ON t3.plant_id = items.plant_id
AND t3.user_item_code = items.code
)
SELECT
t4.item_id,
t4.plant_id,
t4.plant_name,
t4.exists_flag,
t4.user_item_code,
COALESCE(sticker_masters.material_type, 0) AS material_type,
CASE
WHEN sticker_masters.item_id IS NULL
THEN 'no match found'
WHEN COALESCE(sticker_masters.material_type, 0) = 0
THEN 'serial invoice'
ELSE 'material invoice'
END AS invoice_description
FROM t4
LEFT JOIN sticker_masters
ON sticker_masters.plant_id = t4.plant_id
AND sticker_masters.item_id = t4.item_id
", [$plant->name, $itemCode]);
} catch (\Exception $e) {
Log::error('GeminiChatbotService: invoice report query failed', [
'plant' => $plant->name,
'item_code' => $itemCode,
'error' => $e->getMessage(),
]);
return "Sorry, I couldn't fetch data. Please try again or contact support.";
}
if (empty($rows)) {
return "No data found for plant \"{$plant->name}\". Please verify the plant name.";
}
$row = $rows[0];
if ((int) $row->exists_flag === 0) {
return 'The provided item code does not exist in the item table.';
}
return match ($row->invoice_description) {
'serial invoice' => 'It is a serial invoice item.',
'material invoice' => 'It is a material invoice item.',
'no match found' => "Item not found in sticker master for plant {$plant->name}.",
default => 'Unexpected result. Please contact support.',
};
}
/**
* Production Report — resolves plant/line names to IDs and runs the count query.
* Mirrors ChatBot::fetchProduction() but works with plain names instead of IDs,
* using resolvePlant() for robust fuzzy matching.
*/
private function handleProductionReport(array $params): string
{
$plantName = trim($params['plant_name'] ?? '');
$lineName = trim($params['line_name'] ?? '');
$dateFrom = $params['date_from'] ?? now()->startOfMonth()->format('Y-m-d');
$dateTo = $params['date_to'] ?? now()->format('Y-m-d');
if (empty($plantName)) {
return 'I need a plant name to fetch the production report. Which plant are you asking about?';
}
// ── Fuzzy-resolve the plant name ──────────────────────────────────────
$plant = $this->resolvePlant($plantName);
if ($plant === null) {
return "I couldn't find a plant matching \"{$plantName}\". "
. 'Please check the plant name and try again.';
}
// ── Base query ────────────────────────────────────────────────────────
$query = DB::table('production_quantities')
->whereNull('deleted_at')
->where('plant_id', $plant->id)
->whereDate('created_at', '>=', $dateFrom)
->whereDate('created_at', '<=', $dateTo);
$lineLabel = 'All Lines';
// ── Optionally filter by line (fuzzy LIKE match) ──────────────────────
if (! empty($lineName)) {
$line = $this->resolveLine($lineName, $plant->id);
if ($line === null) {
return "I couldn't find a line matching \"{$lineName}\" "
. "in plant \"{$plant->name}\". Please check the line name.";
}
$query->where('line_id', $line->id);
$lineLabel = $line->name;
}
try {
$count = $query->count();
} catch (\Exception $e) {
Log::error('GeminiChatbotService: production query failed', [
'plant' => $plant->name,
'line' => $lineLabel,
'error' => $e->getMessage(),
]);
return "Sorry, I couldn't fetch production data for {$plant->name}. "
. 'Please try again or contact support.';
}
$from = \Carbon\Carbon::parse($dateFrom)->format('d M Y');
$to = \Carbon\Carbon::parse($dateTo)->format('d M Y');
return "📊 Production count for {$plant->name} / {$lineLabel} "
. "from {$from} to {$to}: {$count} records.";
}
// ─────────────────────────────────────────────────────────────────────────
// Fuzzy name resolvers
// ─────────────────────────────────────────────────────────────────────────
/**
* Resolve a user-supplied plant name to the best matching DB row.
*
* Strategy cascade (stops at first hit):
* 1. Exact case-insensitive match → "ransar industries-i" == "Ransar Industries-I"
* 2. Normalised LIKE match → strips hyphens/spaces, swaps I↔1
* 3. Every significant word present (LIKE) → "ransar unit 2" matches "Ransar Industries Unit 2"
* 4. Best token-overlap score → picks the DB row sharing the most words
*
* @return object|null stdClass with {id, name} or null if no match.
*/
private function resolvePlant(string $userInput): ?object
{
$allPlants = DB::table('plants')
->whereNull('deleted_at')
->get(['id', 'name']);
$norm = $this->normaliseForMatching($userInput);
// ── Strategy 1: exact normalised match ────────────────────────────────
foreach ($allPlants as $plant) {
if ($this->normaliseForMatching($plant->name) === $norm) {
return $plant;
}
}
// ── Strategy 2: normalised LIKE (user input contained in plant name or vice-versa) ──
foreach ($allPlants as $plant) {
$dbNorm = $this->normaliseForMatching($plant->name);
if (str_contains($dbNorm, $norm) || str_contains($norm, $dbNorm)) {
return $plant;
}
}
// ── Strategy 3: all significant user words appear in the plant name ───
$userTokens = $this->significantTokens($norm);
if (count($userTokens) >= 1) {
foreach ($allPlants as $plant) {
$dbNorm = $this->normaliseForMatching($plant->name);
$allFound = true;
foreach ($userTokens as $token) {
if (! str_contains($dbNorm, $token)) {
$allFound = false;
break;
}
}
if ($allFound) {
return $plant;
}
}
}
// ── Strategy 4: best token-overlap score ─────────────────────────────
$bestPlant = null;
$bestScore = 0;
foreach ($allPlants as $plant) {
$dbTokens = $this->significantTokens($this->normaliseForMatching($plant->name));
$shared = count(array_intersect($userTokens, $dbTokens));
// Require at least half the user tokens to match to avoid false positives
$threshold = max(1, (int) ceil(count($userTokens) / 2));
if ($shared >= $threshold && $shared > $bestScore) {
$bestScore = $shared;
$bestPlant = $plant;
}
}
return $bestPlant;
}
/**
* Resolve a user-supplied line name within a specific plant.
* Uses the same normalisation + token strategies as resolvePlant().
*
* @return object|null stdClass with {id, name} or null if no match.
*/
private function resolveLine(string $userInput, int $plantId): ?object
{
$allLines = DB::table('lines')
->whereNull('deleted_at')
->where('plant_id', $plantId)
->get(['id', 'name']);
$norm = $this->normaliseForMatching($userInput);
// Strategy 1: exact normalised
foreach ($allLines as $line) {
if ($this->normaliseForMatching($line->name) === $norm) {
return $line;
}
}
// Strategy 2: normalised LIKE
foreach ($allLines as $line) {
$dbNorm = $this->normaliseForMatching($line->name);
if (str_contains($dbNorm, $norm) || str_contains($norm, $dbNorm)) {
return $line;
}
}
// Strategy 3: all user tokens found in line name
$userTokens = $this->significantTokens($norm);
foreach ($allLines as $line) {
$dbNorm = $this->normaliseForMatching($line->name);
$allFound = true;
foreach ($userTokens as $token) {
if (! str_contains($dbNorm, $token)) {
$allFound = false;
break;
}
}
if ($allFound) {
return $line;
}
}
// Strategy 4: best token-overlap
$bestLine = null;
$bestScore = 0;
foreach ($allLines as $line) {
$dbTokens = $this->significantTokens($this->normaliseForMatching($line->name));
$shared = count(array_intersect($userTokens, $dbTokens));
$threshold = max(1, (int) ceil(count($userTokens) / 2));
if ($shared >= $threshold && $shared > $bestScore) {
$bestScore = $shared;
$bestLine = $line;
}
}
return $bestLine;
}
/**
* Normalise a plant/line name for fuzzy comparison:
* - lowercase
* - replace Roman numeral suffixes I/II/III/IV → 1/2/3/4 (and vice-versa digits → numerals as a canonical form)
* - collapse hyphens, underscores, extra spaces into a single space
* - strip leading/trailing whitespace
*
* Both the user input AND the DB value are passed through this before comparing,
* so the comparison is always apples-to-apples.
*/
private function normaliseForMatching(string $value): string
{
$v = strtolower($value);
// 1. Punctuation/separators → space
$v = str_replace(['-', '_', '.', ','], ' ', $v);
// 2. Split any letter→digit or digit→letter boundary with a space.
// e.g. "industries1" → "industries 1", "unit2" → "unit 2", "2unit" → "2 unit"
// This must happen BEFORE Roman numeral conversion so isolated digits are
// already separated from words.
$v = preg_replace('/([a-z])(\d)/', '$1 $2', $v);
$v = preg_replace('/(\d)([a-z])/', '$1 $2', $v);
// 3. Convert standalone Roman numerals to digits.
// Applied AFTER splitting so "industries" is never touched —
// the \b boundary ensures only whole tokens are matched.
// Order matters: longer patterns first (iii before ii before i).
$romanMap = [
'/\bviii\b/' => '8',
'/\bvii\b/' => '7',
'/\bvi\b/' => '6',
'/\biv\b/' => '4',
'/\biii\b/' => '3',
'/\bii\b/' => '2',
'/\bv\b/' => '5',
'/\bi\b/' => '1', // last — single i only after all others consumed
];
foreach ($romanMap as $pattern => $digit) {
$v = preg_replace($pattern, $digit, $v);
}
// 4. Collapse multiple spaces
$v = preg_replace('/\s+/', ' ', $v);
return trim($v);
}
/**
* Split a normalised string into significant tokens (drops noise words).
*
* @return array<string>
*/
private function significantTokens(string $normalised): array
{
$stopWords = ['and', 'the', 'of', 'for', 'at', 'in', 'a'];
$tokens = explode(' ', $normalised);
return array_values(array_filter($tokens, function (string $t) use ($stopWords) {
return strlen($t) >= 2 && ! in_array($t, $stopWords, true);
}));
}
// ─────────────────────────────────────────────────────────────────────────
/**
* Unknown task — let Gemini respond conversationally.
*
* If the classification step already produced a useful clarification string
* (e.g. "Could you clarify — are you asking about scan status or invoice type?"),
* we return that directly without a second API call.
* Otherwise we hit Gemini again with a conversational system prompt.
*/
private function handleUnknown(
array $chatHistory,
string $userInput,
?string $clarificationFromClassifier
): string {
$reply = $this->callGeminiConversational(
$chatHistory,
$userInput,
$clarificationFromClassifier
);
return $reply
?? "I'm not sure I understood that. I can help you with:\n\n"
. "• Invoice Status — scan progress of an invoice\n"
. "• Invoice Report — serial vs material type for an item\n"
. "• Production Report — unit count for a plant / line\n\n"
. "What would you like to check?";
}
}