import React, { FC, useState, useContext, useEffect } from 'react';
import axios from 'axios';
import FuserLoader from '../../containers/FuserPage/FuserLoader';
import { MAX_PREVIEW_CHARS } from '../../constants/blocks';
import BlockProps from '../../models/BlockProps';
import Block from '../../models/Block';
import FuserContext from '../../context/FuserContext';
import useBlockRunner from '../../hooks/useBlockRunner';
import AutocompleteTextarea from '../../containers/FuserPage/AutocompleteTextarea';
import { useAuthHeader } from 'react-auth-kit';
import { backendURL } from '../../constants/environmental';
import { testButtonStyles } from '../../constants/styles';
import { unflatten, updateAtIndex } from '../../utils/array';
import { encodeHtmlEntities, truncateAfter } from '../../utils/string';

// const cheerio = require('cheerio');

// scrape not used now
/*
async function scrape(url: string) {
  var output = '';
  let html;

  // encode URL so that forward slashes are not interpreted as request parameters
  const encodedURL = encodeURIComponent(url);

  try {
    const response = await fetch(
      `${backendURL}/fetch-data/${encodedURL}`
      //`https://api.skillfusion.ai/fetch-data/${encodedURL}`
    );
    console.log(response);
    // const body = await response.body;
    // console.log(body);
    const jsonResponse = await response.json();
    if (response.ok) {
      html = jsonResponse;
    } else {
      const error = jsonResponse?.error;
      throw new Error(`${error ?? jsonResponse}`);
    }
  } catch (error: any) {
    throw new Error(error?.message ?? error?.error);
  }

  const match = html?.match(/<body[^>]*>([\s\S]*)<\/body>/);

  let content = match?.[1];

  if (typeof content !== 'string') return 'Invalid HTML content';

  content = content
    .replace(/<!--.*?-->/gs, '') // remove html comments
    .replace(
      /<(script|style|header|head|iframe|button|footer|svg|picture|nav|figure|aside|form|figcaption|blockquote|video|audio|canvas|comment|noscript|textarea|gu)( [^>]*?)?>.*?<\/\1>/gs,
      ' '
    ) // remove irrelevant tags
    .replace(/<(img|meta|link|br|hr|input|base|embed|param)( [^>]*?)?>/g, '') // remove self closing tags
    .replace(/(<table[^>]*?>.*?<\/table>)/gms, convertHTMLtableToText);

  const noInnerHTMLRegex = /<([^\s>]*?)[^>]*?>\s*<\/\1>/g;

  let startTimestamp = Date.now();

  while (noInnerHTMLRegex.test(content)) {
    content = content.replace(noInnerHTMLRegex, ''); // remove tags with no inner html
    if (Date.now() - startTimestamp > 5000) {
      console.log('removing empty html tags timed out');
      break;
    }
  }

  const inlineTextRegex =
    /<p.*?<(a|b|em|span|strong|i|code|sup|sub|cite|q|abbr|time|var|del|ins|mark|small|u|s|bdi)(?:[^>]*?)>([^<]*?)<\/\1>/gs;

  while (inlineTextRegex.test(content)) {
    content = content.replace(inlineTextRegex, '$2'); // turn inline text part of paragraphs into plain text
    if (Date.now() - startTimestamp > 5000) {
      console.log('converting inline tags to plain text timed out');
      break;
    }
  }

  content = content
    .replace(/<(a|span)( [^>]*?)?>.*?<\/\1>/gs, '') // remove all remaining a/span tags
    .replace(/(<(?:h[12345]|p|br|div|li)( [^>]*?)?>)/g, '\n\n') // add new line characters for readability
    .replace(/<[^>]+?>/g, ' ') // remove any remaining html tags
    .replace(/\n\s*\n/g, '\n') // combine multiple new lines in to single new line
    .replace(/:/g, '-');

  output = content.trim();

  // Grace period to not overload in case scraping from the same site
  //await pause(1000);

  return output;
}

*/

// moved this to backend
// const parseHTMLOld = (htmlBody: any) => {
//   if (typeof htmlBody !== 'string') return 'Invalid HTML content';

//   htmlBody = htmlBody
//     .replace(/<!--.*?-->/gs, '') // remove html comments
//     .replace(
//       /<(script|style|header|head|iframe|button|footer|svg|picture|nav|figure|aside|form|figcaption|blockquote|video|audio|canvas|comment|noscript|textarea|gu)[^>]*?>.*?<\/\1>/gs,
//       ' '
//     ) // remove irrelevant tags
//     .replace(/<(img|meta|link|br|hr|input|base|embed|param)[^>]*>/g, '') // remove self closing tags
//     .replace(/(<table[^>]*?>.*?<\/table>)/gms, convertHTMLtableToText);
//   const noInnerHTMLRegex = /<([^\s>]*?)[^>]*?>\s*<\/\1>/g;
//   while (noInnerHTMLRegex.test(htmlBody))
//     htmlBody = htmlBody.replace(noInnerHTMLRegex, ''); // remove tags with no inner html

//   const inlineTextRegex =
//     /<p.*?<(a|b|em|span|strong|i|code|sup|sub|cite|q|abbr|time|var|del|ins|mark|small|u|s|bdi)( [^>]*?)?>(.*?)<\/\1>/gs;
//   while (inlineTextRegex.test(htmlBody))
//     htmlBody = htmlBody.replace(inlineTextRegex, '$3'); // turn inline text part of paragraphs into plain text

//   htmlBody = htmlBody
//     .replace(/<(a|span)( [^>]*?)?>.*?<\/\1>/gs, '') // remove all remaining a/span tags
//     .replace(/(<(?:h[12345]|p|br|div|li)( [^>]*?)?>)/g, '\n\n') // add new line characters for readability
//     .replace(/<[^>]+?>/g, ' ') // remove any remaining html tags
//     .replace(/\n\s*\n/g, '\n') // combine multiple new lines in to single new line
//     .replace(/:/g, '-');

//   return htmlBody.trim();
// };

// const parseHTML = (html: any) => {
//   const $ = cheerio.load(html);

//   // Remove irrelevant tags
//   $(
//     'script, style, header, head, iframe, button, footer, svg, picture, nav, figure, aside, form, figcaption, blockquote, video, audio, canvas, noscript, textarea, img, meta, link, br, hr, input, base, embed, param, comment'
//   ).remove();

//   // Remove tags with no inner HTML
//   $(':empty').remove();

//   // Iterate over each tag and convert tables to text if any
//   $('table').each((i: any, el: any) => {
//     let tableText = '';
//     $(el)
//       .find('tr')
//       .each((i: any, tr: any) => {
//         $(tr)
//           .find('th, td')
//           .each((i: any, cell: any) => {
//             tableText += $(cell).text() + '\t';
//           });
//         tableText += '\n';
//       });
//     $(el).replaceWith(tableText);
//   });

//   // Extract the main content text
//   const mainContentText = $('body').text();

//   // combine multiple newlines into one newline
//   return mainContentText.trim().replace(/\n\s*\n/g, '\n');
// };

const ScraperBlock: FC<BlockProps> = ({
  isLoading,
  setIsLoading,
  index,
  resultHtml,
  block,
  handleChange,
  collapsed,
}) => {
  const {
    blockStyles,
    replacePlaceholders,
    runnerMode,
    blocks,
    setBlocks,
    setStillRunning,
    // handleTextareaFocus,
    // textAreaRefs,
    // setRunnerIndex,
  } = useContext(FuserContext);

  const updateBlocks = () =>
    setBlocks((blocks: Block[]) => updateAtIndex(index, block, blocks));

  useEffect(() => {
    if (block.data.ignoreFailedScrapes === undefined) {
      block.data.ignoreFailedScrapes = false;
    }
    updateBlocks();
  }, []);

  const authHeader = useAuthHeader()();

  const [removeHTML, setRemoveHTML] = useState(
    block.data.removeHTMLOption !== null &&
      block.data.removeHTMLOption !== undefined
      ? block.data.removeHTMLOption
      : true
  );

  const [pauseBetween, setPauseBetween] = useState(
    block.data.pauseBetweenOption !== null &&
      block.data.pauseBetweenOption !== undefined
      ? block.data.pauseBetweenOption
      : false
  );
  // const [cropWords, setCropWords] = useState(false);
  const [cropCount, setCropCount] = useState(block.data.cropWordsCount || 2000); // chars
  // console.log(
  //   'block.data.cropWordsCount',
  //   block.data.cropWordsCount,
  //   typeof block.data.cropWordsCount,
  //   cropCount
  // );

  const onTestClick = async () => {
    setErrorMessage('');
    setIsLoading(true);

    const { inputToProcess } = block.data;

    let processedInput: string[] | string = inputToProcess
      ? replacePlaceholders(inputToProcess, blocks)
      : '';

    //console.log('scraper block response: ', block.data.response);
    const inputType = typeof processedInput;

    if (typeof processedInput === 'string') {
      processedInput = processedInput.split('\n');
    }

    if (!Array.isArray(processedInput)) processedInput = [processedInput];

    // to break it down to scraping a few at a time, check they aren't scraping anything shady or using this to post spam from our domain using url variables

    console.log('processedInput', processedInput);
    try {
      let blockResponse: any;

      if (block.data.getYoutubeTranscript) {
        const transcriptFetchResponse = await axios.get(
          `${backendURL}/youtube-transcript?videoURL=${processedInput[0]}`
        );
        blockResponse = transcriptFetchResponse.data;
        // console.log(blockResponse);
      } else {
        // console.log(processedInput.flat(Infinity));
        const response = await axios.post(
          `${backendURL}/fusion/scraper/`,
          {
            urls: processedInput.flat(Infinity),
            delay: pauseBetween,
            removeHTML,
            cropCount,
            ignoreFailedScrapes,
          },
          {
            headers: {
              Authorization: authHeader,
            },
          }
        );

        // console.log(
        //   'scraped',
        //   response.data.scrapedContent.length,
        //   'articles:',
        //   response.data.scrapedContent
        // );
        // console.log('response data:', response.data);
        // moved this to backend
        // for (let i = 0; i < response.data.data.length; i++) {
        //   if (removeHTML) {
        //     //response.data.data[i] = parseHTML(response.data.data[i]);

        //     const match = response.data.data[i]?.match(
        //       /<body[^>]*>([\s\S]*)<\/body>/
        //     );

        //     let content = match?.[1];

        //     response.data.data[i] = parseHTMLOld(content);
        //   } // end of remove html

        //   // crop length of output
        //   if (cropCount > 0) {
        //     response.data.data[i] = response.data.data[i].substring(
        //       0,
        //       cropCount * 5
        //     );
        //   }
        // }
        blockResponse = response.data.scrapedContent;
      }

      if (inputType !== 'string') {
        blockResponse = unflatten(blockResponse, processedInput);
      }

      setBlocks((blocks: Block[]) => {
        const newBlocks = [...blocks];

        newBlocks[index] = {
          ...newBlocks[index],
          updatedBlock: true,
          data: {
            ...newBlocks[index].data,
            type: 'scraper',
            response: blockResponse,
          },
        };
        return newBlocks;
      });

      //console.log(block);
      setIsLoading(false);
      return;
      /*
      Promise.all(scrapePromises)
        .then(response => {
          setErrorMessage('');
  
        setErrorMessage('');
  
        console.log(responses);
  
        setBlocks((blocks: Block[]) => {
          const newBlocks = [...blocks];
  
          newBlocks[index] = {
            ...newBlocks[index],
            data: {
              ...newBlocks[index].data,
              type: 'scraper',
              response: responses,
            },
          };
          return newBlocks;
        });
  
        */
    } catch (error) {
      if (block.data.getYoutubeTranscript && error.response.status === 400) {
        setErrorMessage(
          'Sorry, there was an error fetching the transcript. Some videos may not have one.'
        );
        setStillRunning(false);
      } else {
        setErrorMessage('Error scraping URL(s)');
      }
      console.error(error);
      setIsLoading(false);
      return;
      //return 'CANCEL';
    }
  };

  useBlockRunner(onTestClick, index);

  const [errorMessage, setErrorMessage] = useState<string>('');

  useEffect(() => setErrorMessage(''), [runnerMode]);

  const {
    response,
    inputToProcess,
    ignoreFailedScrapes,
    getYoutubeTranscript,
  } = block.data;

  if (collapsed) {
    if (response?.length > 0)
      return (
        <>
          <div>Scraped Content:</div>
          <div>{truncateAfter(MAX_PREVIEW_CHARS, response.toString())}</div>
        </>
      );
    else return <div>Scraper Block</div>;
  }

  // console.log(response);
  // console.log(ignoreFailedScrapes);
  return (
    <FuserLoader
      name='Scraper Block'
      loading={isLoading}
    >
      {runnerMode ? (
        response && response?.length > 0 ? (
          <>
            <p>Scraped Content:</p>
            <p>{errorMessage}</p>
            <div
              className='overflow-x-auto'
              dangerouslySetInnerHTML={{
                __html: encodeHtmlEntities(resultHtml),
              }}
            />
          </>
        ) : (
          <>
            <div>Scraper block</div>
            <p>{errorMessage}</p>
          </>
        )
      ) : (
        <div
          className={blockStyles}
          key={index}
        >
          <label
            className='text-xs'
            id='prompt-textarea'
          >
            URLs to scrape (one per line):
          </label>

          <AutocompleteTextarea
            block={block}
            index={index}
            onChange={handleChange}
            textAreaIndex={0}
            className='w-full prompt-textarea bg-transparent rounded-xl text-sm border border-neutral-100 shadow-inner '
            name='inputToProcess'
            value={inputToProcess || ''}
          />

          <div className='flex flex-col gap-4'>
            <div className='flex gap-2 items-center'>
              <input
                type='checkbox'
                name='removeHTMLOption'
                checked={removeHTML}
                onChange={() => {
                  block.data.removeHTMLOption = !removeHTML;
                  // console.log('block.data.removeHTMLOption', block.data.removeHTMLOption);
                  setRemoveHTML(!removeHTML);
                }}
              />
              <label htmlFor='removeHTMLOption'>remove HTML & Code </label>
            </div>

            <div className='flex gap-2 items-center'>
              <input
                type='checkbox'
                name='pauseBetweenOption'
                checked={pauseBetween}
                onChange={() => {
                  block.data.pauseBetweenOption = !pauseBetween;
                  setPauseBetween(!pauseBetween);
                }}
              />
              <label htmlFor='pauseBetweenOption'>
                pause between scrapes (if scraping multiple pages from the same
                site)
              </label>
            </div>

            <div className='flex gap-2 items-center'>
              <input
                type='checkbox'
                name='ignoreFailedScrapesOption'
                checked={ignoreFailedScrapes}
                onChange={(e: any) => {
                  block.data.ignoreFailedScrapes = e.target.checked;
                  updateBlocks();
                }}
              />
              <label htmlFor='ignoreFailedScrapesOption'>
                Ignore failed scrapes
              </label>
            </div>

            <div className='flex gap-2 items-center'>
              <input
                type='checkbox'
                name='getYoutubeTranscriptOption'
                checked={getYoutubeTranscript}
                onChange={(e: any) => {
                  block.data.getYoutubeTranscript = e.target.checked;
                  console.log(e.target.checked);
                  updateBlocks();
                }}
              />
              <label htmlFor='getYoutubeTranscriptOption'>
                Get transcript (for Youtube video URLs)
              </label>
            </div>

            <span>
              Crop scraped content at &nbsp;
              <input
                type='textbox'
                name='cropWordsCount'
                className='w-20'
                defaultValue={cropCount}
                onChange={(e: any) => {
                  block.data.cropWordsCount = +e.target.value;
                  setCropCount(+e.target.value);
                }}
              />{' '}
              words ({cropCount * 5} characters)
            </span>
          </div>

          <button
            onClick={onTestClick}
            className={testButtonStyles}
          >
            Scrape
          </button>

          {errorMessage === '' || <p>{errorMessage}</p>}

          <div
            className='overflow-x-auto'
            dangerouslySetInnerHTML={{ __html: encodeHtmlEntities(resultHtml) }}
          />
        </div>
      )}
    </FuserLoader>
  );
};

export default ScraperBlock;
