package ca.pfv.spmf.algorithms.clustering.text_clusterer;

import ca.pfv.spmf.tools.MemoryLogger;
import ca.pfv.spmf.tools.textprocessing.PorterStemmer;
import ca.pfv.spmf.tools.textprocessing.StopWordAnalyzer;
import java.io.BufferedReader;
import java.io.BufferedWriter;
import java.io.File;
import java.io.FileReader;
import java.io.FileWriter;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.HashSet;
import java.util.Iterator;

/* loaded from: input_file:ca/pfv/spmf/algorithms/clustering/text_clusterer/TextClusterAlgo.class */
public class TextClusterAlgo {
    private HashSet<String> allWords = new HashSet<>();
    private HashMap<Integer, Integer> idMap = new HashMap<>();
    private long startTimestamp = 0;
    private long endTimeStamp = 0;
    private boolean stemFlag;
    private boolean stopWordFlag;
    private PorterStemmer stemmer;

    public void runAlgorithm(String str, String str2, boolean z, boolean z2) {
        this.stemFlag = z;
        this.stopWordFlag = z2;
        runAlgorithm(str, str2);
    }

    public void runAlgorithm(String str, String str2) {
        this.startTimestamp = System.currentTimeMillis();
        this.stemmer = new PorterStemmer();
        try {
            BufferedReader bufferedReader = new BufferedReader(new FileReader(new File(str)));
            if (str == null || str2 == null) {
                System.out.println("Please pass the path of the input");
                return;
            }
            BufferedWriter bufferedWriter = new BufferedWriter(new FileWriter(new File(str2)));
            ArrayList<Record> loadInput = loadInput(bufferedReader, this.stemFlag, this.stopWordFlag);
            Iterator<Record> it = loadInput.iterator();
            while (it.hasNext()) {
                Record next = it.next();
                double[] dArr = new double[this.allWords.size()];
                int i = 0;
                Iterator<String> it2 = this.allWords.iterator();
                while (it2.hasNext()) {
                    dArr[i] = FindTFIDF(next.getAttribute(), it2.next(), loadInput);
                    i++;
                }
                next.setTfVector(dArr);
            }
            double[][] dArr2 = new double[loadInput.size()][loadInput.size()];
            for (int i2 = 0; i2 < loadInput.size(); i2++) {
                for (int i3 = 0; i3 < loadInput.size(); i3++) {
                    dArr2[i2][i3] = calculateSimilarity(loadInput.get(i2).getTfVector(), loadInput.get(i3).getTfVector());
                }
            }
            ArrayList arrayList = new ArrayList();
            for (int i4 = 0; i4 < loadInput.size(); i4++) {
                double d = 0.0d;
                int i5 = 0;
                int i6 = 0;
                for (int i7 = 0; i7 < loadInput.size(); i7++) {
                    if (i4 != i7 && dArr2[i4][i7] > d) {
                        d = dArr2[i4][i7];
                        i5 = i4;
                        i6 = i7;
                    }
                }
                SimilarRecords similarRecords = new SimilarRecords();
                similarRecords.setRecord1Pos(i5);
                similarRecords.setRecord2Pos(i6);
                similarRecords.setSimilarity(d);
                arrayList.add(similarRecords);
            }
            HashSet hashSet = new HashSet();
            Iterator it3 = arrayList.iterator();
            while (it3.hasNext()) {
                SimilarRecords similarRecords2 = (SimilarRecords) it3.next();
                int record1Pos = similarRecords2.getRecord1Pos();
                int record2Pos = similarRecords2.getRecord2Pos();
                ArrayList<Integer> arrayList2 = new ArrayList<>();
                TextCluster textCluster = new TextCluster();
                arrayList2.add(Integer.valueOf(record1Pos));
                arrayList2.add(Integer.valueOf(record2Pos));
                textCluster.setCluster(arrayList2);
                hashSet.add(textCluster);
            }
            Iterator it4 = new HashSet(hashSet).iterator();
            int i8 = 0;
            bufferedWriter.write("RecordId\tClusternum\n");
            while (it4.hasNext()) {
                ArrayList<Integer> cluster = ((TextCluster) it4.next()).getCluster();
                for (int i9 = 0; i9 < cluster.size(); i9++) {
                    bufferedWriter.write(String.valueOf(this.idMap.get(cluster.get(i9))) + "\t" + i8 + "\n");
                }
                i8++;
            }
            bufferedWriter.close();
            this.endTimeStamp = System.currentTimeMillis();
        } catch (Exception e) {
            System.out.println("Either file didn't exist or error while clustering");
            e.printStackTrace();
        }
    }

    public void printStatistics() {
        System.out.println("========== Text Clusterer - STATS ============");
        System.out.println(" Total time ~: " + (this.endTimeStamp - this.startTimestamp) + " ms");
        System.out.println(" Max memory:" + MemoryLogger.getInstance().getMaxMemory() + " mb ");
        System.out.println("=====================================");
    }

    private double calculateSimilarity(double[] dArr, double[] dArr2) {
        double d = 0.0d;
        for (int i = 0; i < dArr.length; i++) {
            d += dArr[i] * dArr2[i];
        }
        return d;
    }

    private ArrayList<Record> loadInput(BufferedReader bufferedReader, boolean z, boolean z2) {
        ArrayList<Record> arrayList = new ArrayList<>();
        int i = 0;
        while (true) {
            try {
                String readLine = bufferedReader.readLine();
                if (readLine == null) {
                    return arrayList;
                }
                String[] split = readLine.split("\t", -1);
                Record record = new Record();
                int parseInt = Integer.parseInt(split[0]);
                record.setRecordId(parseInt);
                String replaceAll = split[1].toLowerCase().replaceAll("[^a-zA-Z0-9]+", " ");
                if (z2) {
                    replaceAll = new StopWordAnalyzer().removeStopWords(replaceAll);
                }
                this.idMap.put(Integer.valueOf(i), Integer.valueOf(parseInt));
                String[] split2 = replaceAll.split(" ");
                String str = "";
                for (String str2 : split2) {
                    if (z) {
                        str2 = this.stemmer.stem(str2);
                    }
                    str = str + str2 + " ";
                    this.allWords.add(str2);
                }
                record.setAttribute(str);
                arrayList.add(record);
                i++;
            } catch (Exception e) {
                e.printStackTrace();
                return arrayList;
            }
        }
    }

    private double FindTFIDF(String str, String str2, ArrayList<Record> arrayList) {
        return FindTermFrequency(str, str2) * FindInverseDocumentFrequency(str2, arrayList);
    }

    private float FindInverseDocumentFrequency(String str, ArrayList<Record> arrayList) {
        int i = 0;
        Iterator<Record> it = arrayList.iterator();
        while (it.hasNext()) {
            if (it.next().getAttribute().contains(str)) {
                i++;
            }
        }
        return (float) Math.log(i / (1.0f + arrayList.size()));
    }

    private double FindTermFrequency(String str, String str2) {
        int i = 0;
        for (String str3 : str.split(" ")) {
            if (str3.equalsIgnoreCase(str2)) {
                i++;
            }
        }
        return i / r0.length;
    }
}
