from flask import Flask, request, jsonify, render_template
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sentence_transformers import SentenceTransformer
import re
from nltk.corpus import stopwords
import nltk
import pickle
import os

app = Flask(__name__)

# Download NLTK stopwords
nltk.download('stopwords')
stop_words = set(stopwords.words('english'))

# Load the SentenceTransformer model
model = SentenceTransformer('all-MiniLM-L6-v2')


# Preprocess text
def preprocess_text(text, stop_words):
    if not isinstance(text, str):
        return ""
    text = re.sub(r'\W+', ' ', text)
    text = text.lower()
    text = ' '.join([word for word in text.split() if word not in stop_words])
    return text


# Load the dataset
def load_data(file_path):
    try:
        df = pd.read_csv(file_path, encoding='latin1')
        return df
    except Exception as e:
        raise Exception(f"Failed to load CSV: {str(e)}")


# Generate embeddings for the dataset
def generate_embeddings(df):
    df['processed_description'] = df['description'].apply(preprocess_text, stop_words=stop_words)
    df['embeddings'] = df['processed_description'].apply(lambda x: model.encode(x))
    return df


# Load the model and data
def load_model_and_data():
    try:
        with open('sentence_transformer_model.pkl', 'rb') as f:
            model = pickle.load(f)
        with open('processed_data.pkl', 'rb') as f:
            df = pickle.load(f)
        return model, df
    except FileNotFoundError:
        return None, None


# Save the model and data
def save_model_and_data(model, df):
    with open('sentence_transformer_model.pkl', 'wb') as f:
        pickle.dump(model, f)
    with open('processed_data.pkl', 'wb') as f:
        pickle.dump(df, f)


@app.route('/', methods=['GET'])
def recommend_input():
    return render_template('templates/index.html')


@app.route('/recommend', methods=['POST'])
def recommend():
    try:
        data = request.json
        product_name = data.get('product_name')

        if not product_name:
            return jsonify({"error": "Product name is required."}), 400

        # Load preprocessed data and model (if exists)
        model, df = load_model_and_data()

        # If no data/model is loaded, load it and generate embeddings
        if model is None or df is None:
            file_path = 'C:\\Projects\\Product Recommender System\\products(des).csv'
            df = load_data(file_path)
            df = generate_embeddings(df)
            save_model_and_data(model, df)

        # Find the product
        product_row = df[df['product_name'] == product_name]
        if product_row.empty:
            return jsonify({"error": f"Product '{product_name}' not found."}), 404

        product_embedding = product_row['embeddings'].values[0]

        # Calculate similarities
        similarities = []
        for idx, row in df.iterrows():
            if row['product_name'] != product_name:
                similarity = cosine_similarity(
                    product_embedding.reshape(1, -1),
                    row['embeddings'].reshape(1, -1)
                )[0][0]
                similarities.append((idx, similarity))

        similarities.sort(key=lambda x: x[1], reverse=True)

        # Get top 10 recommendations
        top_recommendations = []
        for idx, similarity in similarities[:10]:
            top_recommendations.append({
                'product_name': df.loc[idx, 'product_name'],
                'description': df.loc[idx, 'description'],
                'similarity': float(similarity)
            })

        return jsonify(top_recommendations)

    except Exception as e:
        return jsonify({"error": str(e)}), 500


# Health check route
@app.route('/health', methods=['GET'])
def health():
    return jsonify({"status": "ok"})


if __name__ == '__main__':
    app.run(debug=True)
