//! DIMCAS CNF parser and writer for the Varisat SAT solver.

use std::{borrow::Borrow, io, mem::replace};

use varisat_formula::{CnfFormula, ExtendFormula, Lit, Var};

use anyhow::Error;
use thiserror::Error;

/// Possible errors while parsing a DIMACS CNF formula.
#[derive(Debug, Error)]
pub enum ParserError {
    #[error(
        "line {}: Unexpected character in DIMACS CNF input: '{}'",
        line,
        unexpected
    )]
    UnexpectedInput { line: usize, unexpected: char },
    #[error(
        "line {}: Literal index is too large: {}{}...",
        line,
        index,
        final_digit
    )]
    LiteralTooLarge {
        line: usize,
        index: usize,
        final_digit: usize,
    },
    #[error("line {}: Invalid header syntax: {}", line, header)]
    InvalidHeader { line: usize, header: String },
    #[error("line {}: Unterminated clause", line)]
    UnterminatedClause { line: usize },
    #[error(
        "Formula has {} variables while the header specifies {} variables",
        var_count,
        header_var_count
    )]
    VarCount {
        var_count: usize,
        header_var_count: usize,
    },
    #[error(
        "Formula has {} clauses while the header specifies {} clauses",
        clause_count,
        header_clause_count
    )]
    ClauseCount {
        clause_count: usize,
        header_clause_count: usize,
    },
    #[error("Parser invoked after a previous error")]
    PreviousError,
}

/// Variable and clause count present in a DIMACS CNF header.
#[derive(Copy, Clone, Debug)]
pub struct DimacsHeader {
    pub var_count: usize,
    pub clause_count: usize,
}

/// Parser for DIMACS CNF files.
///
/// This parser can consume the input in chunks while also producing the parsed result in chunks.
#[derive(Default)]
pub struct DimacsParser {
    formula: CnfFormula,
    partial_clause: Vec<Lit>,
    header: Option<DimacsHeader>,

    line_number: usize,
    clause_count: usize,
    partial_lit: usize,
    negate_next_lit: bool,

    in_lit: bool,
    in_comment_or_header: bool,
    in_header: bool,
    start_of_line: bool,
    error: bool,

    header_line: Vec<u8>,
}

impl DimacsParser {
    /// Create a new DIMACS CNF parser.
    pub fn new() -> DimacsParser {
        DimacsParser {
            formula: CnfFormula::new(),
            partial_clause: vec![],
            header: None,

            line_number: 1,
            clause_count: 0,
            partial_lit: 0,
            negate_next_lit: false,

            in_lit: false,
            in_comment_or_header: false,
            in_header: false,
            start_of_line: true,
            error: false,

            header_line: vec![],
        }
    }

    /// Parse the given input and check the header if present.
    ///
    /// This parses the whole input into a single [`CnfFormula`](varisat_formula::CnfFormula).
    /// Incremental parsing is possible using [`parse_incremental`](DimacsParser::parse_incremental)
    /// or the [`parse_chunk`](DimacsParser::parse_chunk) method.
    pub fn parse(input: impl io::Read) -> Result<CnfFormula, Error> {
        Ok(Self::parse_incremental(input, |_| Ok(()))?.take_formula())
    }

    /// Parse the given input incrementally and check the header if present.
    ///
    /// The callback is invoked repeatedly with a reference to the parser. The callback can process
    /// the formula incrementally by calling [`take_formula`](DimacsParser::take_formula) on the
    /// passed argument.
    pub fn parse_incremental(
        input: impl io::Read,
        mut callback: impl FnMut(&mut DimacsParser) -> Result<(), Error>,
    ) -> Result<DimacsParser, Error> {
        use io::BufRead;

        let mut buffer = io::BufReader::new(input);
        let mut parser = Self::new();

        loop {
            let data = buffer.fill_buf()?;
            if data.is_empty() {
                break;
            }
            parser.parse_chunk(data)?;
            let len = data.len();
            buffer.consume(len);

            callback(&mut parser)?;
        }
        parser.eof()?;
        callback(&mut parser)?;
        parser.check_header()?;

        Ok(parser)
    }

    /// Parse a chunk of input.
    ///
    /// After parsing the last chunk call the [`eof`](DimacsParser::eof) method.
    ///
    /// If this method returns an error, the parser is in an invalid state and cannot parse further
    /// chunks.
    pub fn parse_chunk(&mut self, chunk: &[u8]) -> Result<(), ParserError> {
        if self.error {
            return Err(ParserError::PreviousError);
        }
        for &byte in chunk.iter() {
            if byte == b'\n' {
                self.line_number += 1;
            }
            match byte {
                b'\n' | b'\r' if self.in_comment_or_header => {
                    if self.in_header {
                        self.in_header = false;
                        self.parse_header_line()?;
                    }
                    self.in_comment_or_header = false;
                    self.start_of_line = true
                }
                _ if self.in_comment_or_header => {
                    if self.in_header {
                        self.header_line.push(byte);
                    }
                }
                b'0'..=b'9' => {
                    self.in_lit = true;
                    let digit = (byte - b'0') as usize;

                    const CAN_OVERFLOW: usize = Var::max_count() / 10;
                    const OVERFLOW_DIGIT: usize = Var::max_count() % 10;

                    // Overflow check that is fast but still works if LitIdx has the same size as
                    // usize
                    if CAN_OVERFLOW <= self.partial_lit {
                        let carry = (digit <= OVERFLOW_DIGIT) as usize;

                        if CAN_OVERFLOW + carry <= self.partial_lit {
                            self.error = true;
                            return Err(ParserError::LiteralTooLarge {
                                line: self.line_number,
                                index: self.partial_lit,
                                final_digit: digit,
                            });
                        }
                    }

                    self.partial_lit = self.partial_lit * 10 + digit;

                    self.start_of_line = false
                }
                b'-' if !self.negate_next_lit && !self.in_lit => {
                    self.negate_next_lit = true;
                    self.start_of_line = false
                }
                b' ' | b'\n' | b'\r' if !self.negate_next_lit || self.in_lit => {
                    self.finish_literal();
                    self.negate_next_lit = false;
                    self.in_lit = false;
                    self.partial_lit = 0;
                    self.start_of_line = byte != b' ';
                }
                b'c' if self.start_of_line => {
                    self.in_comment_or_header = true;
                }
                b'p' if self.start_of_line && self.header.is_none() => {
                    self.in_comment_or_header = true;
                    self.in_header = true;
                    self.header_line.push(b'p');
                }
                _ => {
                    self.error = true;
                    return Err(ParserError::UnexpectedInput {
                        line: self.line_number,
                        unexpected: byte as char,
                    });
                }
            }
        }

        Ok(())
    }

    /// Finish parsing the input.
    ///
    /// This does not check whether the header information was correct, call
    /// [`check_header`](DimacsParser::check_header) for this.
    pub fn eof(&mut self) -> Result<(), ParserError> {
        if self.in_header {
            self.parse_header_line()?;
        }

        self.finish_literal();

        if !self.partial_clause.is_empty() {
            return Err(ParserError::UnterminatedClause {
                line: self.line_number,
            });
        }

        Ok(())
    }

    /// Verifies the header information when present.
    ///
    /// Does nothing when the input doesn't contain a header.
    pub fn check_header(&self) -> Result<(), ParserError> {
        if let Some(header) = self.header {
            let var_count = self.formula.var_count();
            if var_count != header.var_count {
                return Err(ParserError::VarCount {
                    var_count,
                    header_var_count: header.var_count,
                });
            }

            if self.clause_count != header.clause_count {
                return Err(ParserError::ClauseCount {
                    clause_count: self.clause_count,
                    header_clause_count: header.clause_count,
                });
            }
        }

        Ok(())
    }

    /// Returns the subformula of everything parsed since the last call to this method.
    ///
    /// To parse the whole input into a single [`CnfFormula`](varisat_formula::CnfFormula), simply
    /// call this method once after calling [`eof`](DimacsParser::eof). For incremental parsing this
    /// method can be invoked after each call of [`parse_chunk`](DimacsParser::parse_chunk).
    ///
    /// The variable count of the returned formula will be the maximum of the variable count so far
    /// and the variable count of the header if present.
    pub fn take_formula(&mut self) -> CnfFormula {
        let mut new_formula = CnfFormula::new();
        new_formula.set_var_count(self.formula.var_count());
        replace(&mut self.formula, new_formula)
    }

    /// Return the DIMACS CNF header data if present.
    pub fn header(&self) -> Option<DimacsHeader> {
        self.header
    }

    /// Number of clauses parsed.
    pub fn clause_count(&self) -> usize {
        self.clause_count
    }

    /// Number of variables in the parsed formula.
    pub fn var_count(&self) -> usize {
        self.formula.var_count()
    }

    fn finish_literal(&mut self) {
        if self.in_lit {
            if self.partial_lit == 0 {
                self.formula.add_clause(&self.partial_clause);
                self.partial_clause.clear();
                self.clause_count += 1;
            } else {
                self.partial_clause
                    .push(Var::from_dimacs(self.partial_lit as isize).lit(!self.negate_next_lit));
            }
        }
    }

    fn parse_header_line(&mut self) -> Result<(), ParserError> {
        let header_line = String::from_utf8_lossy(&self.header_line).into_owned();

        if !header_line.starts_with("p ") {
            return self.invalid_header(header_line);
        }

        let mut header_values = header_line[2..].split_whitespace();

        if header_values.next() != Some("cnf") {
            return self.invalid_header(header_line);
        }

        let var_count: usize = match header_values
            .next()
            .and_then(|value| str::parse(value).ok())
        {
            None => return self.invalid_header(header_line),
            Some(value) => value,
        };

        if var_count > Var::max_count() {
            self.error = true;
            return Err(ParserError::LiteralTooLarge {
                line: self.line_number,
                index: var_count / 10,
                final_digit: var_count % 10,
            });
        }

        let clause_count: usize = match header_values
            .next()
            .and_then(|value| str::parse(value).ok())
        {
            None => return self.invalid_header(header_line),
            Some(value) => value,
        };

        if header_values.next().is_some() {
            return self.invalid_header(header_line);
        }

        self.header = Some(DimacsHeader {
            var_count,
            clause_count,
        });

        self.formula.set_var_count(var_count);

        Ok(())
    }

    fn invalid_header(&mut self, header_line: String) -> Result<(), ParserError> {
        self.error = true;
        Err(ParserError::InvalidHeader {
            line: self.line_number,
            header: header_line,
        })
    }
}

/// Write a DIMACS CNF header.
///
/// Can be used with [`write_dimacs_clauses`] to implement incremental writing.
pub fn write_dimacs_header(target: &mut impl io::Write, header: DimacsHeader) -> io::Result<()> {
    writeln!(
        target,
        "p cnf {var_count} {clause_count}",
        var_count = header.var_count,
        clause_count = header.clause_count
    )
}

/// Write an iterator of clauses as headerless DIMACS CNF.
///
/// Can be used with [`write_dimacs_header`] to implement incremental writing.
pub fn write_dimacs_clauses(
    target: &mut impl io::Write,
    clauses: impl IntoIterator<Item = impl IntoIterator<Item = impl Borrow<Lit>>>,
) -> io::Result<()> {
    for clause in clauses.into_iter() {
        for lit in clause.into_iter() {
            itoa::write(&mut *target, lit.borrow().to_dimacs())?;
            target.write_all(b" ")?;
        }
        target.write_all(b"0\n")?;
    }
    Ok(())
}

/// Write a formula as DIMACS CNF.
///
/// Use [`write_dimacs_header`] and [`write_dimacs_clauses`] to implement incremental writing.
pub fn write_dimacs(target: &mut impl io::Write, formula: &CnfFormula) -> io::Result<()> {
    write_dimacs_header(
        &mut *target,
        DimacsHeader {
            var_count: formula.var_count(),
            clause_count: formula.len(),
        },
    )?;
    write_dimacs_clauses(&mut *target, formula.iter())
}

#[cfg(test)]
mod tests {
    use super::*;

    use anyhow::Error;
    use proptest::{test_runner::TestCaseError, *};

    use varisat_formula::{cnf::strategy::*, cnf_formula};

    #[test]
    fn odd_whitespace() -> Result<(), Error> {
        let parsed = DimacsParser::parse(
            b"p  cnf  4   3  \n  1  \n 2  3\n0 -4 0 2\nccomment  \n\n0\n\n" as &[_],
        )?;

        let expected = cnf_formula![
            1, 2, 3;
            -4;
            2;
        ];

        assert_eq!(parsed, expected);

        Ok(())
    }

    macro_rules! expect_error {
        ( $input:expr, $( $cases:tt )* ) => {
            match DimacsParser::parse($input as &[_]) {
                Ok(parsed) => panic!("Expexcted errror but got {:?}", parsed),
                Err(err) => match err.downcast_ref() {
                    Some(casted_err) => match casted_err {
                        $( $cases )*,
                        _ => panic!("Unexpected error {:?}", casted_err),
                    },
                    None => panic!("Unexpected error type {:?}", err),
                }
            }
        };
    }

    #[test]
    fn invalid_headers() {
        expect_error!(b"pcnf 1 3", ParserError::InvalidHeader { .. } => ());
        expect_error!(b"p notcnf 1 3", ParserError::InvalidHeader { .. } => ());
        expect_error!(b"p cnf 1", ParserError::InvalidHeader { .. } => ());
        expect_error!(b"p cnf 1 2 3", ParserError::InvalidHeader { .. } => ());
        expect_error!(b"p cnf foo bar", ParserError::InvalidHeader { .. } => ());
        expect_error!(b"p cnf -3 -6", ParserError::InvalidHeader { .. } => ());

        expect_error!(
            format!("p cnf {} 4", Var::max_var().to_dimacs() + 1).as_bytes(),
            ParserError::LiteralTooLarge { .. } => ()
        );
        DimacsParser::parse(format!("p cnf {} 0", Var::max_var().to_dimacs()).as_bytes()).unwrap();

        expect_error!(b"p cnf 4 18446744073709551616", ParserError::InvalidHeader { .. } => ());

        expect_error!(
            b"p cnf 1 2\np cnf 1 2\n",
            ParserError::UnexpectedInput { unexpected: 'p', .. } => ()
        );
    }

    #[test]
    fn invalid_header_data() {
        expect_error!(
            b"p cnf 1 1\n 2 0",
            ParserError::VarCount { var_count: 2, header_var_count: 1 } => ()
        );

        expect_error!(
            b"p cnf 10 1\n 1 0 0",
            ParserError::ClauseCount { clause_count: 2, header_clause_count: 1 } => ()
        );

        expect_error!(
            b"p cnf 10 4\n 1 0",
            ParserError::ClauseCount { clause_count: 1, header_clause_count: 4 } => ()
        );
    }

    #[test]
    fn syntax_errors() {
        expect_error!(
            b"1 2 ?foo",
            ParserError::UnexpectedInput { unexpected: '?', .. } => ()
        );

        expect_error!(
            b"1 2 - 3 0",
            ParserError::UnexpectedInput { unexpected: ' ', .. } => ()
        );

        expect_error!(
            b"1 2 -\n3 0",
            ParserError::UnexpectedInput { unexpected: '\n', .. } => ()
        );

        expect_error!(
            b"1 2 --3 0",
            ParserError::UnexpectedInput { unexpected: '-', .. } => ()
        );

        expect_error!(
            b"1 2-3 0",
            ParserError::UnexpectedInput { unexpected: '-', .. } => ()
        );
    }

    #[test]
    fn unterminated_clause() {
        expect_error!(
            b"1 2 3",
            ParserError::UnterminatedClause { .. } => ()
        );
    }

    #[test]
    fn literal_too_large() {
        expect_error!(
            format!("1 {} 2 0", Var::max_var().to_dimacs() + 1).as_bytes(),
            ParserError::LiteralTooLarge { .. } => ()
        );

        assert_eq!(
            DimacsParser::parse(format!("1 {} 2 0", Var::max_var().to_dimacs()).as_bytes())
                .unwrap(),
            cnf_formula![
                1, Var::max_var().to_dimacs(), 2;
            ]
        );
    }

    proptest! {

        #[test]
        fn roundtrip(input in cnf_formula(1..100usize, 0..1000, 0..10)) {
            let mut buf = vec![];

            write_dimacs(&mut buf, &input)?;

            let parsed = DimacsParser::parse(&buf[..]).map_err(|e| TestCaseError::fail(e.to_string()))?;

            prop_assert_eq!(parsed, input);
        }
    }
}