#include <limits>
#include <iostream>
#include <fstream>
#include <map>
#include <set>
#include <list>
#include <memory>
#include <ctime>
#include <chrono>
#include <cmath>
#include <bitset>
#include <bit>
#include <algorithm>
#include <thread>
#include <sstream>
#include <iomanip>
#include <cmath>
#include "utilita.h"
#include "poset.h"
#include "functionLinearExtension.h"
#include "paramType.h"
#include "lossFunctionMRP.h"
#include "types.h"
#include "bitSet.h"
#include "matrice.h"
#include "dimensionalityReduction.h"


//************************************
//************************************
//************************************

void RunDisplayMessage(std::shared_ptr<DisplayMessage> displayMessage, bool& run) {
    auto sleep_for = displayMessage->OutputSeconds();
    while (true) {
        std::this_thread::sleep_for(std::chrono::milliseconds(sleep_for * 1000));
        displayMessage->Display();
        if (!run) break;
    }
}


//************************************
//************************************
//************************************

DimensionalityReductionResult::DimensionalityReductionResult(std::uint_fast64_t& all_le_elaborate, std::mutex& all_le_elaborate_mutex) :  __all_le_elaborate(all_le_elaborate), __all_le_elaborate_mutex(all_le_elaborate_mutex) {
    __le_elaborate.clear();
    __loss_values.clear();
    __best_permutation_iter = __le_elaborate.end();
    __best_loss = std::numeric_limits<double>::infinity();
}

void DimensionalityReductionResult::AddLE(std::vector<std::uint_fast64_t> le, double loss) {
    // attenzione no thread safe
    {
        //std::lock_guard<std::mutex> guard(__le_elaborate_mutex);
        __le_elaborate.push_back(le);
        __loss_values.push_back(loss);
    }
    {
        std::lock_guard<std::mutex> guard(__all_le_elaborate_mutex);
        ++__all_le_elaborate;
    }
}

std::list<std::vector<std::uint_fast64_t>>& DimensionalityReductionResult::LEElaborate() {
    return __le_elaborate;
}

std::list<double>& DimensionalityReductionResult::LossValues() {
    return __loss_values;
}

double DimensionalityReductionResult::BestLossValue() {
    return __best_loss;
}

std::vector<std::uint_fast64_t>& DimensionalityReductionResult::BestLE() {
    return __best_permutation;
}

std::map<std::uint_fast64_t, std::tuple<std::uint_fast64_t, std::uint_fast64_t, double, double>>&
DimensionalityReductionResult::BestProfileResults() {
    return __best_profile_results;
}

void DimensionalityReductionResult::AddLE(DimensionalityReductionResult& r) {
    auto le_iter = r.__le_elaborate.begin();
    auto loss_iter = r.__loss_values.begin();
    for (; le_iter != r.__le_elaborate.end(); ++le_iter, ++loss_iter) {
        __le_elaborate.push_back(*le_iter);
        __loss_values.push_back(*loss_iter);
        if (__best_loss > *loss_iter) {
            __best_loss = *loss_iter;
            __best_permutation_iter = le_iter;
        }
    }
    
}


void DimensionalityReductionResult::BuildBestProfileResults(std::shared_ptr<std::map<std::uint_fast64_t, double>>& weights,
                                                  LossFunctionMRPV2& lfmrp,
                                                  std::vector<std::uint_fast64_t>& elements_used,
                                                  Matrice<double>& mrp_rf,
                                                  std::list<std::shared_ptr<std::vector<std::uint_fast64_t>>>& rows_function,
                                                  std::list<std::shared_ptr<std::vector<std::uint_fast64_t>>>& cols_function) {
    auto le = std::make_shared<std::vector<std::uint_fast64_t>>();
    auto le_inv = std::make_shared<std::vector<std::uint_fast64_t>>();

    __best_permutation.assign(__best_permutation_iter->begin(), __best_permutation_iter->end());
    std::vector<std::uint_fast64_t> __best_permutation_inv(__best_permutation.size());
    for (std::uint_fast64_t p = 0; p < __best_permutation.size(); p++) {
        __best_permutation_inv[p] = __best_permutation[__best_permutation.size() - p - 1];

    }
    auto numero_variabili = __best_permutation.size();
    DimensionalityReductionBuildLE(__best_permutation.size(), __best_permutation, __best_permutation_inv, elements_used, *le, *le_inv);

    rows_function.push_front(le);
    cols_function.push_front(le_inv);
    std::list<std::pair<std::uint_fast64_t, double>> result_error;
    lfmrp(mrp_rf, rows_function, cols_function, result_error);
    rows_function.pop_front();
    cols_function.pop_front();


    auto posizione = [numero_variabili] (std::vector<std::uint_fast64_t>& permutazione,
                                         std::vector<std::uint_fast64_t>& permutazione_inv,
                                         std::uint_fast64_t profilo) {

        //std::cout << "\t" << profilo << " (" << std::bitset<6>(profilo) << "): " << std::endl;

        std::uint_fast64_t pos_lex = 0;
        std::uint_fast64_t pos_lex_inv = 0;
        for (std::uint_fast64_t i = 0; i < numero_variabili; ++i) {
            std::uint_fast64_t variabile = permutazione[i];
            std::uint_fast64_t valore_variabile = (profilo >> (numero_variabili - variabile - 1)) & ((std::uint_fast64_t) 1);
            std::uint_fast64_t nuovo_valore = valore_variabile << (numero_variabili - i - 1);
            //std::cout << "\t\t" << variabile << " -> " << valore_variabile << " ( >> " << (numero_variabili - variabile - 1) << ") : " << nuovo_valore << " ( << "<< (numero_variabili - i - 1) << ") : "  << std::endl;

            pos_lex = pos_lex | nuovo_valore;

            std::uint_fast64_t variabile_inv = permutazione_inv[i];
            std::uint_fast64_t valore_variabile_inv = (profilo >> (numero_variabili - variabile_inv - 1)) & ((std::uint_fast64_t) 1);
            std::uint_fast64_t nuovo_valore_inv = valore_variabile_inv << (numero_variabili - i - 1);
            pos_lex_inv = pos_lex_inv | nuovo_valore_inv;
        }
        //std::cout << "\t" <<  pos_lex << " (" << std::bitset<6>(pos_lex) << ")" << std::endl;

        return std::make_pair(pos_lex, pos_lex_inv);
    };


    std::ostringstream oss;
    std::copy(__best_permutation.begin(), __best_permutation.end()-1, std::ostream_iterator<int>(oss, ","));
    oss << __best_permutation.back();

    //std::cout << "numero_variabili: " << numero_variabili << std::endl;
    //std::cout << "permutazione: " << oss.str() << std::endl;


    for (auto& l : result_error) {
        auto pos = posizione(__best_permutation, __best_permutation_inv, l.first);
        std::uint_fast64_t x = pos.first + 1;
        std::uint_fast64_t y = pos.second + 1;
        double peso = weights->at(l.first);
        auto errore = l.second;
        __best_profile_results[l.first] = {x, y, peso, errore};
    }
    return;
}


//************************************
//************************************
//************************************

std::uint_fast64_t npup(std::uint_fast64_t n, std::uint_fast64_t cardinality) {
    return (((std::uint_fast64_t) 1) << (cardinality - ((std::uint_fast64_t) std::popcount(n)))) - 1;
};

//************************************
//************************************
//************************************

std::uint_fast64_t npdown(std::uint_fast64_t n, std::uint_fast64_t cardinality) {
    return (((std::uint_fast64_t) 1) << std::popcount(n)) - 1;
};

//************************************
//************************************
//************************************

double qmsi(std::uint_fast64_t n, 
            std::uint_fast64_t cardinality,
            std::uint_fast64_t (*npup)(std::uint_fast64_t, std::uint_fast64_t),
            std::uint_fast64_t (*npdown)(std::uint_fast64_t, std::uint_fast64_t)) {
    std::uint_fast64_t nu = npup(n, cardinality);
    std::uint_fast64_t nd = npdown(n, cardinality);
    return (1.0 + nu) / (1.0 + nd);
}

//************************************
//************************************
//************************************

double msi(std::uint_fast64_t i, 
           std::uint_fast64_t j,
           std::uint_fast64_t cardinality,
           std::uint_fast64_t (*npup)(std::uint_fast64_t, std::uint_fast64_t),
           std::uint_fast64_t (*npdown)(std::uint_fast64_t, std::uint_fast64_t)) {
    auto qi = qmsi(i, cardinality, npup, npdown);
    auto qj = qmsi(j, cardinality, npup, npdown);
    
    return qi / (qi + qj);
};

//************************************
//************************************
//************************************

std::uint_fast64_t rmost_zero(std::uint_fast64_t n) {
    std::uint_fast64_t x = ~n;
    std::uint_fast64_t r = x & ~(x-1);
    return (std::uint_fast64_t) std::countr_zero(r);
};

//************************************
//************************************
//************************************

auto zero_positions(std::uint_fast64_t n, std::uint_fast64_t cardinality, std::vector<std::uint_fast64_t>& ris) {
    std::uint_fast64_t p = 0;
    std::uint_fast64_t card = cardinality;
    
    std::uint_fast64_t i = 0;
    while (card > 0) {
        p = rmost_zero(n);
        if (p >= card) break;
        ris[i++] = (p + cardinality - card);
        n = n >> (p + 1);
        card -= (p + 1);
    }
    return i;
};

//************************************
//************************************
//************************************

std::uint_fast64_t npup_int(std::uint_fast64_t n, std::uint_fast64_t cardinality) {
    std::vector<std::uint_fast64_t> zero_pos(cardinality);
    
    auto quanti = zero_positions(n, cardinality, zero_pos);
    
    std::uint_fast64_t ris = quanti;
    std::uint_fast64_t v = 1;
    for (std::uint_fast64_t i = 0; i < quanti; ++i) {
        for (std::uint_fast64_t j = i + 1; j < quanti; ++j) {
            auto esponete = zero_pos[j] - zero_pos[i] - 1;
            ris += v << esponete;
        }
    }
    
    return ris;
};

//************************************
//************************************
//************************************

std::uint_fast64_t npdown_int(std::uint_fast64_t n, std::uint_fast64_t cardinality) {
    auto ris = npup_int(~n, cardinality);
    return ris;
};

//************************************
//************************************
//************************************

std::uint_fast64_t permuta(std::uint_fast64_t n, std::vector<std::uint_fast64_t>& permutazione) {
    std::uint_fast64_t p = permutazione.size() - permutazione[0] - 1;
    std::uint_fast64_t ris = ((std::uint_fast64_t) 1) & (n >> p);
    for (std::uint_fast64_t i = 1; i < permutazione.size(); ++i) {
        std::uint_fast64_t pos = permutazione.size() - permutazione[i] - 1;
        std::uint_fast64_t v = ((std::uint_fast64_t) 1) & (n >> pos);
        ris = (ris << 1) | v;
    }
    return ris;
}


void BidimentionalPosetRepresentation(std::uint_fast64_t numero_variabili,
                                      std::shared_ptr<std::map<std::uint_fast64_t, double>> weights,
                                      std::string loss_str,
                                      std::vector<std::uint_fast64_t>& variable_priority,
                                      DimensionalityReductionResult& result) {
    
    std::uint_fast64_t numero_profili = ((std::uint_fast64_t) 1 << numero_variabili);
    
    std::vector<std::uint_fast64_t> elements_used(numero_profili);
    std::iota(elements_used.begin(), elements_used.end(), 0);
    
    auto mrp_start_poset = std::make_shared<Matrice<double>>(numero_profili, numero_profili, std::numeric_limits<double>::quiet_NaN());

    for (std::uint_fast64_t re = 0; re < elements_used.size(); ++re) {
        auto row = elements_used.at(re);
        (*mrp_start_poset)(row, row) = 1.0;
        for (std::uint_fast64_t ce = re + 1; ce < elements_used.size(); ++ce) {
            auto col = elements_used.at(ce);
            std::uint_fast64_t r = row & col;
            if (r == row) {
                (*mrp_start_poset)(row, col) = 1.0;
                (*mrp_start_poset)(col, row) = 0.0;
            } else if (r == col) {
                (*mrp_start_poset)(row, col) = 0.0;
                (*mrp_start_poset)(col, row) = 1.0;
            } else {
                auto v = msi(row, col, numero_variabili, npup, npdown);
                (*mrp_start_poset)(row, col) = v;
                (*mrp_start_poset)(col, row) = 1.0 - v;
            }
        }
    }
    
    std::vector<std::uint_fast64_t> numbers(numero_variabili);
    for (std::uint_fast64_t p = 0; p < numero_variabili; ++p) {
        numbers[p] = ((std::uint_fast64_t) 1) << p;
    }
    
    std::shared_ptr<LossFunctionMRPV2> lfmrp = nullptr;
    if (loss_str == "LB") {
        lfmrp = std::make_shared<LBMRP2>(std::make_shared<std::vector<std::shared_ptr<Matrice<double>>>>(1, mrp_start_poset), weights);
    } else {
        std::string err_str = "Loss function error!: " + loss_str;
        throw_line(err_str);
    }
    
    Matrice<double> mrp_rf(numero_profili, numero_profili, std::numeric_limits<double>::quiet_NaN());
    
    auto le_rf = std::make_shared<std::vector<std::uint_fast64_t>>();
    auto le_inv_rf = std::make_shared<std::vector<std::uint_fast64_t>>();
    std::vector<std::uint_fast64_t> permutazione_rf(numero_variabili);
    std::vector<std::uint_fast64_t> permutazione_inv_rf(numero_variabili);
    
    for (std::uint_fast64_t i = 0; i < numero_variabili; ++i) {
        permutazione_rf[i] = i;
        permutazione_inv_rf[i] = numero_variabili - i - 1;
    }
    DimensionalityReductionBuildLE(numero_variabili, permutazione_rf, permutazione_inv_rf, elements_used, *le_rf, *le_inv_rf);
    DimensionalityReductionBuildMRPIntersection(permutazione_rf, *le_rf, *le_inv_rf, elements_used, mrp_rf);
    
    std::list<std::shared_ptr<std::vector<std::uint_fast64_t>>> rows_function;
    std::list<std::shared_ptr<std::vector<std::uint_fast64_t>>> cols_function;
    rows_function.push_back(le_rf);
    cols_function.push_back(le_inv_rf);
    
    auto le = std::make_shared<std::vector<std::uint_fast64_t>>();
    auto le_inv = std::make_shared<std::vector<std::uint_fast64_t>>();
    
    std::vector<std::uint_fast64_t> variable_priority_inv(variable_priority.size());
    for (std::uint_fast64_t p = 0; p < variable_priority.size(); p++) {
        variable_priority_inv[p] = variable_priority[variable_priority.size() - p - 1];
        
    }
    
    DimensionalityReductionBuildLE(numero_variabili, variable_priority, variable_priority_inv, elements_used, *le, *le_inv);
    
    rows_function.push_front(le);
    cols_function.push_front(le_inv);
   
    DimensionalityReductionResult local_result(result.__all_le_elaborate, result.__all_le_elaborate_mutex);

    double loss_value = (*lfmrp)(mrp_rf, rows_function, cols_function);
    local_result.AddLE(variable_priority, loss_value);
    rows_function.pop_front();
    cols_function.pop_front();
    
    result.AddLE(local_result);
    
    result.BuildBestProfileResults(weights, *lfmrp, elements_used, mrp_rf, rows_function, cols_function);
    
 }


//************************************
//************************************
//************************************

void DimensionalityReduction(std::uint_fast64_t numero_variabili,
                             std::shared_ptr<std::map<std::uint_fast64_t, double>> weights,
                             std::string loss_str,
                             std::shared_ptr<DisplayMessage> displayMessage,
                             double thread_percentage,
                             DimensionalityReductionResult& result) {
    
    std::uint_fast64_t numero_profili = ((std::uint_fast64_t) 1 << numero_variabili);
    
    std::vector<std::uint_fast64_t> elements_used(numero_profili);
    std::iota(elements_used.begin(), elements_used.end(), 0);
    
    auto mrp_start_poset = std::make_shared<Matrice<double>>(numero_profili, numero_profili, std::numeric_limits<double>::quiet_NaN());

    for (std::uint_fast64_t re = 0; re < elements_used.size(); ++re) {
        auto row = elements_used.at(re);
        (*mrp_start_poset)(row, row) = 1.0;
        for (std::uint_fast64_t ce = re + 1; ce < elements_used.size(); ++ce) {
            auto col = elements_used.at(ce);
            std::uint_fast64_t r = row & col;
            if (r == row) {
                (*mrp_start_poset)(row, col) = 1.0;
                (*mrp_start_poset)(col, row) = 0.0;
            } else if (r == col) {
                (*mrp_start_poset)(row, col) = 0.0;
                (*mrp_start_poset)(col, row) = 1.0;
            } else {
                auto v = msi(row, col, numero_variabili, npup, npdown);
                (*mrp_start_poset)(row, col) = v;
                (*mrp_start_poset)(col, row) = 1.0 - v;
            }
        }
    }
    
    std::vector<std::uint_fast64_t> numbers(numero_variabili);
    for (std::uint_fast64_t p = 0; p < numero_variabili; ++p) {
        numbers[p] = ((std::uint_fast64_t) 1) << p;
    }
    
    std::shared_ptr<LossFunctionMRPV2> lfmrp = nullptr;
    if (loss_str == "LB") {
        lfmrp = std::make_shared<LBMRP2>(std::make_shared<std::vector<std::shared_ptr<Matrice<double>>>>(1, mrp_start_poset), weights);
    } else {
        std::string err_str = "Loss function error!: " + loss_str;
        throw_line(err_str);
    }
    
    auto total_number_le = std::make_shared<std::uint_fast64_t>(0);

    ExactDimensionalityReductionThreads(weights, numero_variabili, *lfmrp, elements_used, displayMessage, thread_percentage, result);
    return;
 }

//************************************
//************************************
//************************************

void ExactDimensionalityReductionThreads(std::shared_ptr<std::map<std::uint_fast64_t, double>> weights,
                                         std::uint_fast64_t numero_variabili,
                                         LossFunctionMRPV2& lfmrp,
                                         std::vector<std::uint_fast64_t>& elements_used,
                                         std::shared_ptr<DisplayMessage> displayMessage,
                                         double thread_percentage,
                                         DimensionalityReductionResult& result) {
    
    auto start_time = std::clock();
    auto t_start = std::chrono::high_resolution_clock::now();
    
    std::uint_fast64_t numero_profili = ((std::uint_fast64_t) 1 << numero_variabili);
    std::uint_fast64_t numero_thread = std::max((std::uint_fast64_t) 1, (std::uint_fast64_t) std::floor(std::thread::hardware_concurrency() * thread_percentage));
    
    std::uint_fast64_t numero_le_elaborate = 0;
    
    std::uint_fast64_t coppie_totali = (numero_variabili * (numero_variabili - 1)) / 2;
    std::uint_fast64_t coppie_da_elaborare_per_thread = std::ceil(coppie_totali / ((double) numero_thread));
    std::uint64_t permutations_x_pair = std::tgamma(numero_variabili - 1);
    std::uint64_t total_le = permutations_x_pair * coppie_totali;
    
    
    bool display_message_run = true;
    std::thread output_thread(RunDisplayMessage, displayMessage, std::ref(display_message_run));
    
    
    displayMessage->Display("Variables: " + std::to_string(numero_variabili));
    displayMessage->Display("Profiles: " + std::to_string(weights->size()));
    displayMessage->Display("Linear Extentions: " + std::to_string(total_le));
    displayMessage->Display("Supported concurrent threads: " + std::to_string(std::thread::hardware_concurrency()));
    displayMessage->Display("Used threads: " + std::to_string(numero_thread));
    displayMessage->Display("Number of threads: " + std::to_string((numero_thread <= coppie_totali ? numero_thread : coppie_totali)));
    displayMessage->Start();
    
    Matrice<double> mrp_rf(numero_profili, numero_profili, std::numeric_limits<double>::quiet_NaN());
    
    auto le_rf = std::make_shared<std::vector<std::uint_fast64_t>>();
    auto le_inv_rf = std::make_shared<std::vector<std::uint_fast64_t>>();
    std::vector<std::uint_fast64_t> permutazione_rf(numero_variabili);
    std::vector<std::uint_fast64_t> permutazione_inv_rf(numero_variabili);
    
    
    for (std::uint_fast64_t i = 0; i < numero_variabili; ++i) {
        permutazione_rf[i] = i;
        permutazione_inv_rf[i] = numero_variabili - i - 1;
    }
    DimensionalityReductionBuildLE(numero_variabili, permutazione_rf, permutazione_inv_rf, elements_used, *le_rf, *le_inv_rf);
    DimensionalityReductionBuildMRPIntersection(permutazione_rf, *le_rf, *le_inv_rf, elements_used, mrp_rf);
    
    std::vector<std::shared_ptr<std::thread>> threads(numero_thread, nullptr);
    std::vector<std::shared_ptr<std::list<std::shared_ptr<std::vector<std::uint_fast64_t>>>>> threads_rows_function(numero_thread, nullptr);
    std::vector<std::shared_ptr<std::list<std::shared_ptr<std::vector<std::uint_fast64_t>>>>> threads_cols_function(numero_thread, nullptr);
    std::vector<std::shared_ptr<DimensionalityReductionResult>> threads_results(numero_thread, nullptr);
    
    std::uint_fast64_t first_value = 0;
    std::uint_fast64_t second_value = 1;
    std::uint_fast64_t conta = 0;
    std::uint_fast64_t thread_creati = 0;
    
    for (std::uint_fast64_t v1 = 0; v1 < numero_variabili - 1; ++v1) {
        for (std::uint_fast64_t v2 = v1 + 1; v2 < numero_variabili; ++v2) {
            //std::cout << v1 << ", " << v2 << std::endl;
            ++conta;
            if (conta >= coppie_da_elaborare_per_thread) {
                //displayMessage->Display("avvio thread con: " + std::to_string(first_value) + ", " + std::to_string(second_value));
                
                conta = 0;
                threads_rows_function.at(thread_creati) = std::make_shared<std::list<std::shared_ptr<std::vector<std::uint_fast64_t>>>>();
                threads_cols_function.at(thread_creati) = std::make_shared<std::list<std::shared_ptr<std::vector<std::uint_fast64_t>>>>();
                threads_rows_function.at(thread_creati)->push_back(le_rf);
                threads_cols_function.at(thread_creati)->push_back(le_inv_rf);
                threads_results.at(thread_creati) = std::make_shared<DimensionalityReductionResult>(result.__all_le_elaborate, result.__all_le_elaborate_mutex);
                
                auto td = std::make_shared<std::thread>(ExactDimensionalityReductionSingleThread,
                                                        first_value,
                                                        second_value,
                                                        coppie_da_elaborare_per_thread,
                                                        std::ref(numero_le_elaborate),
                                                        numero_variabili,
                                                        std::ref(lfmrp),
                                                        std::ref(mrp_rf),
                                                        threads_rows_function.at(thread_creati),
                                                        threads_cols_function.at(thread_creati),
                                                        std::ref(elements_used),
                                                        std::ref(threads_results.at(thread_creati)));
                threads.at(thread_creati) = td;
                ++thread_creati;
                
                first_value = v1;
                second_value = v2 + 1;
                if (second_value >= numero_variabili) {
                    ++first_value;
                    second_value = first_value + 1;
                }
            }
        }
    }
    
    if (conta != 0) {
        //displayMessage->Display("avvio thread con finale: " + std::to_string(first_value) + ", " + std::to_string(second_value));
        
        threads_rows_function.at(thread_creati) = std::make_shared<std::list<std::shared_ptr<std::vector<std::uint_fast64_t>>>>();
        threads_cols_function.at(thread_creati) = std::make_shared<std::list<std::shared_ptr<std::vector<std::uint_fast64_t>>>>();
        threads_rows_function.at(thread_creati)->push_back(le_rf);
        threads_cols_function.at(thread_creati)->push_back(le_inv_rf);
        threads_results.at(thread_creati) = std::make_shared<DimensionalityReductionResult>(result.__all_le_elaborate, result.__all_le_elaborate_mutex);
        
        auto td = std::make_shared<std::thread>(ExactDimensionalityReductionSingleThread,
                                                first_value,
                                                second_value,
                                                coppie_da_elaborare_per_thread,
                                                std::ref(numero_le_elaborate),
                                                numero_variabili,
                                                std::ref(lfmrp),
                                                std::ref(mrp_rf),
                                                threads_rows_function.at(thread_creati),
                                                threads_cols_function.at(thread_creati),
                                                std::ref(elements_used),
                                                threads_results.at(thread_creati));
        
        threads.at(thread_creati) = td;
        ++thread_creati;
    }
    
    for (std::uint_fast64_t p = 0; p < thread_creati; ++p) {
        threads.at(p)->join();
        result.AddLE(*threads_results.at(p));
    }
    
    std::list<std::shared_ptr<std::vector<std::uint_fast64_t>>> rows_function;
    std::list<std::shared_ptr<std::vector<std::uint_fast64_t>>> cols_function;
    rows_function.push_back(le_rf);
    cols_function.push_back(le_inv_rf);
    
    result.BuildBestProfileResults(weights, lfmrp, elements_used, mrp_rf, rows_function, cols_function);
    
    displayMessage->Stop();
    
    display_message_run = false;
    output_thread.join();

    auto end_time = std::clock();
    auto t_end = std::chrono::high_resolution_clock::now();
    
    std::uint_fast64_t milliseconds = std::max((std::uint_fast64_t) (1000.0 * (((double) end_time) - ((double) start_time)) / CLOCKS_PER_SEC), (std::uint_fast64_t) 0);
    std::uint_fast64_t milliseconds_wall = std::chrono::duration<double, std::milli>(t_end - t_start).count();
    
    
    displayMessage->Display("Tempo totale (CPU): " + std::to_string(milliseconds / 1000.0) + "s");
    displayMessage->Display("Tempo totale (Wall): " + std::to_string(milliseconds_wall / 1000.0) + "s");

    return;
}

//************************************
//************************************
//************************************

void ExactDimensionalityReductionSingleThread(std::uint_fast64_t first_value, 
                                              std::uint_fast64_t second_value,
                                              std::uint_fast64_t coppie_da_elaborare,
                                              std::uint_fast64_t& numero_le_elaborate,
                                              std::uint_fast64_t numero_variabili,
                                              LossFunctionMRPV2& lfmrp,
                                              Matrice<double>& mrp_rf,
                                              std::shared_ptr<std::list<std::shared_ptr<std::vector<std::uint_fast64_t>>>> rows_function,
                                              std::shared_ptr<std::list<std::shared_ptr<std::vector<std::uint_fast64_t>>>> cols_function,
                                              std::vector<std::uint_fast64_t>& elements_used,
                                              std::shared_ptr<DimensionalityReductionResult> result) {

    //std::uint_fast64_t numero_profili = ((std::uint_fast64_t) 1 << numero_variabili);
    std::uint_fast64_t conta_elaborate = 0;
    bool fine = false;
    for (std::uint_fast64_t v2 = second_value; v2 < numero_variabili && !fine; ++v2) {
        //displayMessage->Display("ExactDimensionalityReductionSingleThread: " + std::to_string(first_value) + ", " + std::to_string(v2));

        DimensionalityReductionElaboraGruppoPermutazioni(first_value,
                                                         v2,
                                                         numero_variabili,
                                                         elements_used,
                                                         lfmrp,
                                                         mrp_rf,
                                                         *rows_function,
                                                         *cols_function,
                                                         numero_le_elaborate,
                                                         result);
        
        ++conta_elaborate;
        if (conta_elaborate >= coppie_da_elaborare) {
            fine = true;
        }
    }
    if (conta_elaborate < coppie_da_elaborare) {
        for (std::uint_fast64_t v1 = first_value + 1; v1 < numero_variabili - 1 && !fine; ++v1) {
            for (std::uint_fast64_t v2 = v1 + 1; v2 < numero_variabili  && !fine; ++v2) {
                //displayMessage->Display("ExactDimensionalityReductionSingleThread finale: " + std::to_string(v1) + ", " + std::to_string(v2));
                DimensionalityReductionElaboraGruppoPermutazioni(v1,
                                                                 v2,
                                                                 numero_variabili,
                                                                 elements_used,
                                                                 lfmrp,
                                                                 mrp_rf,
                                                                 *rows_function,
                                                                 *cols_function,
                                                                 numero_le_elaborate,
                                                                 result);
                ++conta_elaborate;
                if (conta_elaborate >= coppie_da_elaborare) {
                    fine = true;
                }
            }
        }
    }
}

//************************************
//************************************
//************************************

void DimensionalityReductionElaboraGruppoPermutazioni(std::uint_fast64_t lb_value,
                                                      std::uint_fast64_t up_value,
                                                      std::uint_fast64_t numero_variabili,
                                                      std::vector<std::uint_fast64_t>& elements_used,
                                                      LossFunctionMRPV2& lfmrp,
                                                      Matrice<double>& mrp_rf,
                                                      std::list<std::shared_ptr<std::vector<std::uint_fast64_t>>>& rows_function,
                                                      std::list<std::shared_ptr<std::vector<std::uint_fast64_t>>>& cols_function,
                                                      std::uint_fast64_t& numero_le_elaborate,
                                                      std::shared_ptr<DimensionalityReductionResult> result) {
    
    
    //displayMessage->Display(std::to_string(lb_value) + ", " + std::to_string(up_value));
    //std::uint_fast64_t numero_profili = ((std::uint_fast64_t) 1 << numero_variabili);

    std::vector<std::uint_fast64_t> permutazione(numero_variabili);
    std::vector<std::uint_fast64_t> permutazione_inv(numero_variabili);
    
    std::vector<std::uint_fast64_t> permutazione_interna(numero_variabili - 2);
    for (std::uint_fast64_t i = 0, j = 0; i < numero_variabili; ++i) {
        if (i != lb_value && i != up_value) {
            permutazione_interna[j++] = i;
        }
    }
    //std::uint_fast64_t total_permutations = std::tgamma(numero_variabili - 1);
    do {
        // displayMessage->Display();
        // costruzione  due permutazioni - start
        permutazione[0] = lb_value;
        permutazione_inv[0] = up_value;
        //std::string permutazione_string = std::to_string(permutazione[0]) + " ";
        //std::string permutazione_inv_string = std::to_string(permutazione_inv[0]) + " ";

        for (std::uint_fast64_t p = 0; p < permutazione_interna.size(); p++) {
            permutazione[p + 1] = permutazione_interna[p];
            permutazione_inv[p + 1] = permutazione_interna[permutazione_interna.size() - p - 1];
            //permutazione_string += std::to_string(permutazione[p + 1]) + " ";
            //permutazione_inv_string += std::to_string(permutazione_inv[p + 1]) + " ";
            
        }
        permutazione[numero_variabili - 1] = up_value;
        permutazione_inv[numero_variabili - 1] = lb_value;
        
        //permutazione_string += std::to_string(permutazione[numero_variabili - 1]) + " ";
        //permutazione_inv_string += std::to_string(permutazione_inv[numero_variabili - 1]) + " ";
        // costruzione  due permutazioni - end
        //displayMessage->Display(permutazione_string + ": " + permutazione_inv_string);

        auto le = std::make_shared<std::vector<std::uint_fast64_t>>();
        auto le_inv = std::make_shared<std::vector<std::uint_fast64_t>>();
        
        DimensionalityReductionBuildLE(numero_variabili, permutazione, permutazione_inv, elements_used, *le, *le_inv);
        
        rows_function.push_front(le);
        cols_function.push_front(le_inv);
       
        double loss_value = lfmrp(mrp_rf, rows_function, cols_function);
        //result->AddLE(*le, loss_value);
        result->AddLE(permutazione, loss_value);
        rows_function.pop_front();
        cols_function.pop_front();
    } while(std::next_permutation(std::begin(permutazione_interna), std::end(permutazione_interna)));
    //displayMessage->Display(std::to_string(lb_value) + ", " + std::to_string(up_value));

}


//************************************
//************************************
//************************************

void DimensionalityReductionBuildMRPIntersection(std::vector<std::uint_fast64_t>& permutazione,
                                                 std::vector<std::uint_fast64_t>& le,
                                                 std::vector<std::uint_fast64_t>& le_inv,
                                                 std::vector<std::uint_fast64_t>& elements_used,
                                                 Matrice<double>& mrp_le_intersection) {
    for (std::uint_fast64_t re = 0; re < elements_used.size(); ++re) {
        auto row = elements_used.at(re);
        mrp_le_intersection(row, row) = 1.0;
        for (std::uint_fast64_t ce = re + 1; ce < elements_used.size(); ++ce) {
            auto col = elements_used.at(ce);
            std::uint_fast64_t row_val_in_le = le.at(row); // std::uint_fast64_t row_val_in_le = le.getVal(row);
            std::uint_fast64_t col_val_in_le = le.at(col); // std::uint_fast64_t col_val_in_le = le.getVal(col);
            std::uint_fast64_t row_val_in_le_inv = le_inv.at(row); // std::uint_fast64_t row_val_in_le_inv = le_inv.getVal(row);
            std::uint_fast64_t col_val_in_le_inv = le_inv.at(col); // std::uint_fast64_t col_val_in_le_inv = le_inv.getVal(col);
            if (row_val_in_le < col_val_in_le && row_val_in_le_inv < col_val_in_le_inv) {
                mrp_le_intersection(row, col) = 1.0;
                mrp_le_intersection(col, row) = 0.0;
            } else if (col_val_in_le < row_val_in_le && col_val_in_le_inv < row_val_in_le_inv) {
                mrp_le_intersection(row, col) = 0.0;
                mrp_le_intersection(col, row) = 1.0;
            } else {
                auto perm_row = permuta(row, permutazione);
                auto perm_col = permuta(col, permutazione);
                auto v = msi(perm_row, perm_col, permutazione.size(), npup_int, npdown_int);
                mrp_le_intersection(row, col) = v;
                mrp_le_intersection(col, row) = 1.0 - v;
            }
        }
    }
}

//************************************
//************************************
//************************************

void DimensionalityReductionBuildLE(std::uint_fast64_t numero_variabili,
                                    std::vector<std::uint_fast64_t>& permutazione,
                                    std::vector<std::uint_fast64_t>& permutazione_inv,
                                    std::vector<std::uint_fast64_t>& elements_used,
                                    std::vector<std::uint_fast64_t>& le,
                                    std::vector<std::uint_fast64_t>& le_inv) {
    le.assign(((std::uint_fast64_t) 1 << numero_variabili), std::numeric_limits<std::uint_fast64_t>::max());
    le_inv.assign(((std::uint_fast64_t) 1 << numero_variabili), std::numeric_limits<std::uint_fast64_t>::max());
    for (auto n : elements_used) {
        std::uint_fast64_t pos_lex = 0;
        std::uint_fast64_t pos_lex_inv = 0;
        for (std::uint_fast64_t i = 0; i < numero_variabili; ++i) {
            std::uint_fast64_t old_val = (n >> (numero_variabili - i - 1) & ((std::uint_fast64_t) 1));
            
            std::uint_fast64_t new_val = old_val << (numero_variabili - permutazione[i] - 1);
            pos_lex = pos_lex | new_val;
            
            std::uint_fast64_t new_val_inv = old_val << (numero_variabili - permutazione_inv[i] - 1);
            pos_lex_inv = pos_lex_inv | new_val_inv;
        }
        //le[n] = pos_lex;
        //le_inv[n] = pos_lex_inv;
        le[pos_lex] = n;
        le_inv[pos_lex_inv] = n;
        
    }
}

// ***********************************************
// ***********************************************
// ***********************************************

double nimps(Matrice<double>& mrp_lvl,
             std::map<std::uint_fast64_t, double>& weights_lvl,
             std::map<std::uint_fast64_t, std::uint_fast64_t>& elements_conversion_lvl,
             std::map<std::uint_fast64_t, std::set<std::uint_fast64_t>>& elements_classes_lvl,
             Matrice<double>& mrp_out,
             std::map<std::uint_fast64_t, double>& weights_out,
             std::uint_fast64_t livello) {
    double risultato = 0.0;
    
    double denominatore = 0.0;
    
    for (auto const& [riga, peso_r] : weights_out) {
        for (auto const& [colonna, peso_c] : weights_out) {
            if (riga != colonna) {
                auto v_out = mrp_out.at(riga, colonna);
                denominatore += peso_r * v_out * peso_c;
            }
        }
    }
    std::string str_res = "";
    double numeratore = 0.0;
    for (auto const& [riga, peso_r] : weights_lvl) {
        for (auto const& [colonna, peso_c] : weights_lvl) {
            if (riga != colonna) {
                auto v_lvl = mrp_lvl.at(riga, colonna);
                auto n1 = peso_r * v_lvl * peso_c;
                std::ostringstream oss;
                oss << std::setprecision(8) << std::noshowpoint << riga << "," << colonna << " = " << "(" << peso_r << "*" << peso_c << "*" << v_lvl;
                
                
                //std::string str_loc = "(" + std::to_string(peso_r) + "*" + std::to_string(peso_c) + "*" + std::to_string(v_lvl);
                //std::string str_loc = "(" + std::to_string(peso_r) + "*" + std::to_string(peso_c) + "*" + std::to_string(v_lvl);

                
                auto equiv_riga = elements_classes_lvl.at(riga);
                auto equiv_colonna = elements_classes_lvl.at(colonna);
                double n2 = 0.0;
                for (auto const& v1 : equiv_riga) {
                    for (auto const& v2 : equiv_colonna) {
                        if (v1 != v2) {
                            auto v_out = mrp_out.at(v1, v2);
                            auto peso_v1 = weights_out.at(v1);
                            auto peso_v2 = weights_out.at(v2);
                            n2 += peso_v1 * v_out * peso_v2;
                        }
                        
                    }
                }
                oss << std::setprecision(8) << std::noshowpoint << "-" << n2 << ")";
                //std::cout << oss.str() << std::endl;

                //str_loc += "-" + std::to_string(n2)+ ")";

                numeratore += std::abs(n1 - n2);
                //str_res += str_loc + "+";
                str_res += oss.str() + "+";

            }
        }
    }
    //std::cout << str_res << std::endl;
    risultato = 1.0 - numeratore / denominatore;
    return risultato;
}

// ***********************************************
// ***********************************************
// ***********************************************

double nimps2(Matrice<double>& mrp_lvl,
             std::map<std::uint_fast64_t, double>& weights_lvl,
             std::map<std::uint_fast64_t, std::uint_fast64_t>& elements_conversion_lvl,
             std::map<std::uint_fast64_t, std::set<std::uint_fast64_t>>& elements_classes_lvl,
              Matrice<double>& mrp_out,
             std::map<std::uint_fast64_t, double>& weights_out,
             std::uint_fast64_t livello) {
    
    double denominatore = 0.0;
    double numeratore = 0.0;

    for (auto const& [riga, peso_r] : weights_out) {
        for (auto const& [colonna, peso_c] : weights_out) {
            double v_out = 0.0;
            if (riga != colonna) {
                v_out = mrp_out.at(riga, colonna);
            }
            auto riga_lvl = elements_conversion_lvl.at(riga);
            auto colonna_lvl = elements_conversion_lvl.at(colonna);
            double v_lvl = 0.0;
            if (riga_lvl != colonna_lvl) {
                v_lvl = mrp_lvl.at(riga_lvl, colonna_lvl);
            }
            //auto v_lvl = mrp_lvl.at(riga_lvl, colonna_lvl);
            std::ostringstream oss;
            oss << std::setprecision(3) << std::noshowpoint << riga << "," << colonna << " = " << "(" << peso_r << " * " << peso_c << " * |" << v_lvl << " - " << v_out<< "|)";
            //std::cout << oss.str() << std::endl;
            
            numeratore += peso_r * peso_c * std::abs(v_lvl - v_out);

            denominatore += peso_r * peso_c * v_out;
        }
    }
    
    //std::cout << str_res << std::endl;
    auto risultato = 1.0 - numeratore / denominatore;
    return risultato;
}

// ***********************************************
// ***********************************************
// ***********************************************

std::uint_fast64_t LoadPosetDataFromFile(std::string file_name, 
                                         std::map<std::uint_fast64_t, double>& weights,
                                         char DELIMETER)
{
    std::ifstream fp(file_name);
    if (!fp.good()) {
        std::string err_str = "File not found: " + std::string(file_name);
        throw_line(err_str);
    }
    
    std::string line;
    
    // intestazione il peso + l'ultima colonna
    getline(fp, line);
    Trim(line);
    
    std::uint_fast64_t numero_variabili = split(line, DELIMETER).size() - 1;
    while (getline(fp, line)) {
        Trim(line);
        if (line.length() == (numero_variabili + 1)) continue;
        auto tokens = split(line, DELIMETER);
        std::uint_fast64_t p = 0;
        double peso = std::stod(tokens.at(tokens.size() - 1));
        for (std::uint_fast64_t e = 0; e < tokens.size() - 1; ++e) {
            std::uint_fast64_t v = std::stol(tokens.at(tokens.size() - e - 2));
            p += v << e;
        }
        weights[p] += peso;
    }
    return numero_variabili;
}


// ***********************************************
// ***********************************************
// ***********************************************

void LoadOrdineVariabiliFromFile(std::string file_name, std::vector<std::uint_fast64_t>& permutazione)
{
    std::ifstream fp(file_name);
    if (!fp.good()) {
        std::string err_str = "File not found: " + std::string(file_name);
        throw_line(err_str);
    }
    
    std::string line;
    std::uint_fast64_t posizione = 0;
    while (getline(fp, line)) {
        Trim(line);
        std::uint_fast64_t p = std::stol(line);
        permutazione[posizione++] = p;
    }
}


// ***********************************************
// ***********************************************
// ***********************************************

void SaveZInput(std::string z_file_path, 
                std::vector<std::uint_fast64_t>& elements_used,
                std::uint_fast64_t numero_variabili) {
    std::fstream z_file;
    z_file.open(z_file_path, std::ios::app);
    z_file << "; ";
    for (std::uint_fast64_t pe = 0; pe < elements_used.size(); ++pe) {
        if (pe < (elements_used.size() - 1)) {
            z_file << to_bitstring(elements_used[pe], numero_variabili) << "; ";
        } else {
            z_file << to_bitstring(elements_used[pe], numero_variabili) << "\n";
        }
    }

    for (std::uint_fast64_t pe1 = 0; pe1 < elements_used.size(); ++pe1) {
        std::uint_fast64_t e1 = elements_used[pe1];
        z_file << to_bitstring(e1, numero_variabili) << "; ";
        for (std::uint_fast64_t pe2 = 0; pe2 < elements_used.size(); ++pe2) {
            std::uint_fast64_t e2 = elements_used[pe2];
            std::uint_fast64_t comp_or = e1 | e2;
            std::bitset<64> comp_and(e1 & e2);
            if (comp_or == e2) {
                if (pe2 < (elements_used.size() - 1)) {
                    z_file << "1; ";
                } else {
                    z_file << "1" << "\n";
                }
            } else {
                if (pe2 < (elements_used.size() - 1)) {
                    z_file << "0; ";
                } else {
                    z_file << "0" << "\n";
                }
            }
        }
    }
    z_file.close();
}

// ***********************************************
// ***********************************************
// ***********************************************

void SaveZOutput(std::string zoutput_file_path, 
                 std::vector<std::uint_fast64_t>& best_le,
                 std::vector<std::uint_fast64_t>& best_le_inv,
                 std::vector<std::uint_fast64_t>& elements_used,
                 std::uint_fast64_t numero_variabili) {
    std::fstream zoutput_file;
    zoutput_file.open(zoutput_file_path, std::ios::app);
    zoutput_file << "; ";
    for (std::uint_fast64_t pe = 0; pe < elements_used.size(); ++pe) {
        if (pe < (elements_used.size() - 1)) {
            zoutput_file << to_bitstring(elements_used[pe], numero_variabili) << "; ";
        } else {
            zoutput_file << to_bitstring(elements_used[pe], numero_variabili) << "\n";
        }
    }
    zoutput_file << to_bitstring(elements_used[0], numero_variabili) << "; ";
    for (std::uint_fast64_t pe = 0; pe < elements_used.size(); ++pe) {
        if (pe < (elements_used.size() - 1)) {
            zoutput_file << "1; ";
        } else {
            zoutput_file << "1\n";
        }
    }

    
    for (std::uint_fast64_t pe1 = 1; pe1 < elements_used.size(); ++pe1) {
        std::uint_fast64_t e1 = elements_used[pe1];
        zoutput_file << to_bitstring(e1, numero_variabili) << "; ";
        for (std::uint_fast64_t pe2 = 0; pe2 < elements_used.size(); ++pe2) {
            std::uint_fast64_t e2 = elements_used[pe2];
            if (e1 == e2) {
                if (pe2 < (elements_used.size() - 1)) {
                    zoutput_file << "1; ";
                } else {
                    zoutput_file << "1" << "\n";
                }
                continue;
            }
            std::uint_fast64_t row_val_in_le = best_le.at(e1);
            std::uint_fast64_t col_val_in_le = best_le.at(e2);
            std::uint_fast64_t row_val_in_le_inv = best_le_inv.at(e1);
            std::uint_fast64_t col_val_in_le_inv = best_le_inv.at(e2);
            if (row_val_in_le < col_val_in_le && row_val_in_le_inv < col_val_in_le_inv) {
                if (pe2 < (elements_used.size() - 1)) {
                    zoutput_file << "1; ";
                } else {
                    zoutput_file << "1" << "\n";
                }
            } else {
                if (pe2 < (elements_used.size() - 1)) {
                    zoutput_file << "0; ";
                } else {
                    zoutput_file << "0" << "\n";
                }
            }
        }
    }
    zoutput_file.close();
}


