diff --git a/Cargo.toml b/Cargo.toml index 6de1e89..17de9ab 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -16,4 +16,8 @@ travis-ci = { repository = "jethrogb/rust-cexpr" } nom = { version = "7", default-features = false, features = ["std"] } [dev-dependencies] -clang-sys = ">= 0.13.0, < 0.29.0" +clang = { version = "2", features = ["runtime"] } + +[patch.crates-io] +clang-sys = { git = "https://github.com/reitermarkus/clang-sys", branch = "load-api" } +clang = { git = "https://github.com/reitermarkus/clang-rs", branch = "load-api" } diff --git a/src/expr.rs b/src/expr.rs index 7f7e458..03993cc 100644 --- a/src/expr.rs +++ b/src/expr.rs @@ -29,8 +29,8 @@ use std::ops::{ use crate::literal::{self, CChar}; use crate::token::{Kind as TokenKind, Token}; use crate::ToCexprResult; -use nom::branch::alt; -use nom::combinator::{complete, map, map_opt}; +use nom::branch::{alt, permutation}; +use nom::combinator::{complete, map, map_opt, opt}; use nom::multi::{fold_many0, many0, separated_list0}; use nom::sequence::{delimited, pair, preceded}; use nom::*; @@ -54,6 +54,7 @@ pub enum EvalResult { Float(f64), Char(CChar), Str(Vec), + Cast(Vec>, Box), Invalid, } @@ -121,11 +122,17 @@ fn identifier_token(input: &[Token]) -> CResult<'_, &[u8]> { if input[0].kind == TokenKind::Identifier { Ok((&input[1..], &input[0].raw[..])) } else { - Err(crate::nom::Err::Error((input, crate::ErrorKind::TypedToken(TokenKind::Identifier)).into())) + Err(crate::nom::Err::Error( + (input, crate::ErrorKind::TypedToken(TokenKind::Identifier)).into(), + )) } } } +fn keyword(c: &'static str) -> impl Fn(&[Token]) -> CResult<'_, &[u8]> { + exact_token!(Keyword, c.as_bytes()) +} + fn p(c: &'static str) -> impl Fn(&[Token]) -> CResult<'_, &[u8]> { exact_token!(Punctuation, c.as_bytes()) } @@ -289,6 +296,10 @@ where nom::combinator::map_opt(f, EvalResult::as_numeric) } +fn expr_cast(input: (Vec>, EvalResult)) -> EvalResult { + EvalResult::Cast(input.0, Box::new(input.1)) +} + impl<'a> PRef<'a> { fn unary(self, input: &'_ [Token]) -> CResult<'_, EvalResult> { alt(( @@ -473,6 +484,7 @@ impl<'a> PRef<'a> { fn expr(self, input: &'_ [Token]) -> CResult<'_, EvalResult> { alt(( + map(pair(|i| self.cast(i), |i| self.expr(i)), expr_cast), |i| self.numeric_expr(i), delimited(p("("), |i| self.expr(i), p(")")), |i| self.concat_str(i), @@ -482,6 +494,103 @@ impl<'a> PRef<'a> { .to_cexpr_result() } + fn cast(self, input: &'_ [Token]) -> CResult<'_, Vec>> { + delimited(p("("), |i| self.ty(i), p(")"))(input) + } + + fn int_ty(input: &'_ [Token]) -> CResult<'_, Vec<&[u8]>> { + fn int_signedness(input: &'_ [Token]) -> CResult<'_, &[u8]> { + alt((keyword("unsigned"), keyword("signed")))(input) + } + + fn int_longness(input: &'_ [Token]) -> CResult<'_, &[u8]> { + alt((keyword("short"), keyword("long")))(input) + } + + alt(( + // [const] [(un)signed] long long [int] + map( + permutation(( + opt(keyword("const")), + opt(int_signedness), + keyword("long"), + keyword("long"), + opt(keyword("int")), + )), + |(_, s, i1, i2, _)| { + if let Some(s) = s { + if s == b"signed" { + vec![i1, i2] + } else { + vec![s, i1, i2] + } + } else { + vec![i1, i2] + } + }, + ), + // [const] [(un)signed] long/short [int] + map( + permutation(( + opt(keyword("const")), + opt(int_signedness), + int_longness, + opt(keyword("int")), + )), + |(_, s, i, _)| { + if let Some(s) = s { + if s == b"signed" { + vec![i] + } else { + vec![s, i] + } + } else { + vec![i] + } + }, + ), + // [const] [(un)signed] char/int + map( + permutation(( + opt(keyword("const")), + opt(int_signedness), + alt((keyword("char"), keyword("int"))), + )), + |(_, s, i)| { + if let Some(s) = s { + if s == b"signed" && i == b"int" { + vec![i] + } else { + vec![s, i] + } + } else { + vec![i] + } + }, + ), + ))(input) + } + + fn ty(self, input: &'_ [Token]) -> CResult<'_, Vec>> { + map( + alt(( + // [const] + map( + permutation((opt(keyword("const")), identifier_token)), + |(_, id)| vec![id], + ), + // [const] bool + map( + permutation((opt(keyword("const")), keyword("bool"))), + |(_, b)| vec![b], + ), + Self::int_ty, + )), + |v| v.into_iter().map(|t| t.to_vec()).collect(), + )(input) + .to_cexpr_result() + } + fn macro_definition(self, input: &'_ [Token]) -> CResult<'_, (&'_ [u8], EvalResult)> { pair(identifier_token, |i| self.expr(i))(input) } @@ -601,10 +710,6 @@ pub fn macro_definition(input: &[Token]) -> CResult<'_, (&'_ [u8], EvalResult)> pub fn fn_macro_declaration(input: &[Token]) -> CResult<'_, (&[u8], Vec<&[u8]>)> { pair( identifier_token, - delimited( - p("("), - separated_list0(p(","), identifier_token), - p(")"), - ), + delimited(p("("), separated_list0(p(","), identifier_token), p(")")), )(input) } diff --git a/src/lib.rs b/src/lib.rs index 5170f97..0f98335 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -19,7 +19,7 @@ pub mod nom { //! nom's result types, re-exported. - pub use nom::{error::ErrorKind, error::Error, Err, IResult, Needed}; + pub use nom::{error::Error, error::ErrorKind, Err, IResult, Needed}; } pub mod expr; pub mod literal; diff --git a/tests/clang.rs b/tests/clang.rs index b2484f0..fb55844 100644 --- a/tests/clang.rs +++ b/tests/clang.rs @@ -6,18 +6,18 @@ // option. This file may not be copied, modified, or distributed // except according to those terms. extern crate cexpr; -extern crate clang_sys; use std::collections::HashMap; use std::io::Write; use std::str::{self, FromStr}; -use std::{char, ffi, mem, ptr, slice}; +use std::char; +use std::num::Wrapping; use cexpr::assert_full_parse; use cexpr::expr::{fn_macro_declaration, EvalResult, IdentifierParser}; use cexpr::literal::CChar; use cexpr::token::Token; -use clang_sys::*; +use clang::{source::SourceRange, token::TokenKind, EntityKind}; // main testing routine fn test_definition( @@ -25,18 +25,17 @@ fn test_definition( tokens: &[Token], idents: &mut HashMap, EvalResult>, ) -> bool { + use cexpr::expr::EvalResult::*; + fn bytes_to_int(value: &[u8]) -> Option { - str::from_utf8(value) - .ok() - .map(|s| s.replace("n", "-")) - .map(|s| s.replace("_", "")) - .and_then(|v| i64::from_str(&v).ok()) - .map(::std::num::Wrapping) + let s = str::from_utf8(value).ok()?; + let s = s.rsplit_once('_').map(|(_, s)| s).unwrap_or(s); + + i64::from_str(&s.replace("n", "-")).ok() + .map(Wrapping) .map(Int) } - use cexpr::expr::EvalResult::*; - let display_name = String::from_utf8_lossy(&ident).into_owned(); let functional; @@ -72,6 +71,21 @@ fn test_definition( s.extend_from_slice(rest); } Some(Str(s)) + } else if expected == b"Cast" { + str::from_utf8(value).ok().and_then(|s| { + let (ty, value) = s.rsplit_once("_Int_")?; + + let ty = ty.split("_").filter_map(|t| { + if t == "const" || t == "signed" { + None + } else { + Some(t.as_bytes().to_vec()) + } + }).collect::>>(); + let int = bytes_to_int(value.as_bytes())?; + + Some(Cast(ty, Box::new(int))) + }) } else if expected == b"Int" { bytes_to_int(value) } else if expected == b"Float" { @@ -127,6 +141,7 @@ fn test_definition( return false; } } + assert_full_parse(IdentifierParser::new(&fnidents).expr(&expr_tokens)) } else { IdentifierParser::new(idents) @@ -163,162 +178,79 @@ fn test_definition( } } -// support code for the clang lexer -unsafe fn clang_str_to_vec(s: CXString) -> Vec { - let vec = ffi::CStr::from_ptr(clang_getCString(s)) - .to_bytes() - .to_owned(); - clang_disposeString(s); - vec -} - -#[allow(non_upper_case_globals)] -unsafe fn token_clang_to_cexpr(tu: CXTranslationUnit, orig: &CXToken) -> Token { +fn token_clang_to_cexpr(token: &clang::token::Token) -> Token { Token { - kind: match clang_getTokenKind(*orig) { - CXToken_Comment => cexpr::token::Kind::Comment, - CXToken_Identifier => cexpr::token::Kind::Identifier, - CXToken_Keyword => cexpr::token::Kind::Keyword, - CXToken_Literal => cexpr::token::Kind::Literal, - CXToken_Punctuation => cexpr::token::Kind::Punctuation, - _ => panic!("invalid token kind: {:?}", *orig), + kind: match token.get_kind() { + TokenKind::Comment => cexpr::token::Kind::Comment, + TokenKind::Identifier => cexpr::token::Kind::Identifier, + TokenKind::Keyword => cexpr::token::Kind::Keyword, + TokenKind::Literal => cexpr::token::Kind::Literal, + TokenKind::Punctuation => cexpr::token::Kind::Punctuation, }, - raw: clang_str_to_vec(clang_getTokenSpelling(tu, *orig)).into_boxed_slice(), + raw: token.get_spelling().into_bytes().into_boxed_slice(), } } -extern "C" fn visit_children_thunk( - cur: CXCursor, - parent: CXCursor, - closure: CXClientData, -) -> CXChildVisitResult -where - F: FnMut(CXCursor, CXCursor) -> CXChildVisitResult, -{ - unsafe { (&mut *(closure as *mut F))(cur, parent) } -} - -unsafe fn visit_children(cursor: CXCursor, mut f: F) -where - F: FnMut(CXCursor, CXCursor) -> CXChildVisitResult, -{ - clang_visitChildren( - cursor, - visit_children_thunk:: as _, - &mut f as *mut F as CXClientData, - ); -} - -unsafe fn location_in_scope(r: CXSourceRange) -> bool { - let start = clang_getRangeStart(r); - let mut file = ptr::null_mut(); - clang_getSpellingLocation( - start, - &mut file, - ptr::null_mut(), - ptr::null_mut(), - ptr::null_mut(), - ); - clang_Location_isFromMainFile(start) != 0 - && clang_Location_isInSystemHeader(start) == 0 - && file != ptr::null_mut() +fn location_in_scope(r: &SourceRange) -> bool { + let start = r.get_start(); + let location = start.get_spelling_location(); + start.is_in_main_file() && !start.is_in_system_header() && location.file.is_some() } -/// tokenize_range_adjust can be used to work around LLVM bug 9069 -/// https://bugs.llvm.org//show_bug.cgi?id=9069 fn file_visit_macros, Vec)>( file: &str, - tokenize_range_adjust: bool, mut visitor: F, ) { - unsafe { - let tu = { - let index = clang_createIndex(true as _, false as _); - let cfile = ffi::CString::new(file).unwrap(); - let mut tu = mem::MaybeUninit::uninit(); - assert!( - clang_parseTranslationUnit2( - index, - cfile.as_ptr(), - [b"-std=c11\0".as_ptr() as *const ::std::os::raw::c_char].as_ptr(), - 1, - ptr::null_mut(), - 0, - CXTranslationUnit_DetailedPreprocessingRecord, - &mut *tu.as_mut_ptr() - ) == CXError_Success, - "Failure reading test case {}", - file - ); - tu.assume_init() - }; - visit_children(clang_getTranslationUnitCursor(tu), |cur, _parent| { - if cur.kind == CXCursor_MacroDefinition { - let mut range = clang_getCursorExtent(cur); - if !location_in_scope(range) { - return CXChildVisit_Continue; - } - range.end_int_data -= if tokenize_range_adjust { 1 } else { 0 }; - let mut token_ptr = ptr::null_mut(); - let mut num = 0; - clang_tokenize(tu, range, &mut token_ptr, &mut num); - if token_ptr != ptr::null_mut() { - let tokens = slice::from_raw_parts(token_ptr, num as usize); - let tokens: Vec<_> = tokens - .iter() - .filter_map(|t| { - if clang_getTokenKind(*t) != CXToken_Comment { - Some(token_clang_to_cexpr(tu, t)) - } else { - None - } - }) - .collect(); - clang_disposeTokens(tu, token_ptr, num); - visitor(clang_str_to_vec(clang_getCursorSpelling(cur)), tokens) - } + let clang = clang::Clang::new().unwrap(); + + let index = clang::Index::new(&clang, false, true); + + let tu = index + .parser(file) + .arguments(&["-std=c11"]) + .detailed_preprocessing_record(true) + .skip_function_bodies(true) + .parse() + .unwrap(); + + let entity = tu.get_entity(); + + entity.visit_children(|cur, _parent| { + if cur.get_kind() == EntityKind::MacroDefinition { + let range = cur.get_range().unwrap(); + if !location_in_scope(&range) { + return clang::EntityVisitResult::Continue; } - CXChildVisit_Continue - }); - clang_disposeTranslationUnit(tu); - }; + + let tokens: Vec<_> = range + .tokenize() + .into_iter() + .filter_map(|token| { + if token.get_kind() == TokenKind::Comment { + return None; + } + + Some(token_clang_to_cexpr(&token)) + }) + .collect(); + + let display_name = cur.get_display_name().unwrap(); + visitor(display_name.into_bytes(), tokens) + } + + clang::EntityVisitResult::Continue + }); } fn test_file(file: &str) -> bool { let mut idents = HashMap::new(); let mut all_succeeded = true; - file_visit_macros(file, fix_bug_9069(), |ident, tokens| { + file_visit_macros(file, |ident, tokens| { all_succeeded &= test_definition(ident, &tokens, &mut idents) }); all_succeeded } -fn fix_bug_9069() -> bool { - fn check_bug_9069() -> bool { - let mut token_sets = vec![]; - file_visit_macros( - "tests/input/test_llvm_bug_9069.h", - false, - |ident, tokens| { - assert_eq!(&ident, b"A"); - token_sets.push(tokens); - }, - ); - assert_eq!(token_sets.len(), 2); - token_sets[0] != token_sets[1] - } - - use std::sync::atomic::{AtomicBool, Ordering}; - use std::sync::Once; - - static CHECK_FIX: Once = Once::new(); - static FIX: AtomicBool = AtomicBool::new(false); - - CHECK_FIX.call_once(|| FIX.store(check_bug_9069(), Ordering::SeqCst)); - - FIX.load(Ordering::SeqCst) -} - macro_rules! test_file { ($f:ident) => { #[test] diff --git a/tests/input/fail.h b/tests/input/fail.h index fd416bc..fb59128 100644 --- a/tests/input/fail.h +++ b/tests/input/fail.h @@ -1,4 +1,3 @@ -#define FAIL_function_like(x) 3 #define FAIL_empty #define FAIL_invalid_for_radix 0b2 #define FAIL_shift_by_float 3<<1f diff --git a/tests/input/int_signed.h b/tests/input/int_signed.h index 65854a6..8ee9b9e 100644 --- a/tests/input/int_signed.h +++ b/tests/input/int_signed.h @@ -1,3 +1,25 @@ #define Int_n3 -(-(-3)) #define Int_n5 -3-2 #define Int_n9223372036854775808 -9223372036854775808 + +#define Fn_Int_n9(_3) _3*-3 + +#define Cast_short_Int_n6 (short) -6 +#define Cast_signed_short_Int_n6 (signed short) -6 +#define Cast_const_short_Int_n6 (const short) -6 +#define Cast_const_signed_short_Int_n6 (const signed short) -6 + +#define Cast_int_Int_n6 (int) -6 +#define Cast_signed_int_Int_n6 (signed int) -6 +#define Cast_const_int_Int_n6 (const int) -6 +#define Cast_const_signed_int_Int_n6 (const signed int) -6 + +#define Cast_long_Int_n6 (long) -6 +#define Cast_signed_long_Int_n6 (signed long) -6 +#define Cast_const_long_Int_n6 (const long) -6 +#define Cast_const_signed_long_Int_n6 (const signed long) -6 + +#define Cast_long_long_Int_n6 (long long) -6 +#define Cast_signed_long_long_Int_n6 (signed long long) -6 +#define Cast_const_long_long_Int_n6 (const long long) -6 +#define Cast_const_signed_long_long_Int_n6 (const signed long long) -6 diff --git a/tests/input/int_unsigned.h b/tests/input/int_unsigned.h index 6663dda..c38c9ae 100644 --- a/tests/input/int_unsigned.h +++ b/tests/input/int_unsigned.h @@ -10,7 +10,7 @@ #define Int_124 124u #define Int_125 125uL #define Int_126 126LuL -#define Int_16 (((1)<<4ULL))/*comment*/ +#define Int_16 (((1)<<4ULL))/*comment*/ #define Int_13 1|8^6&2<<1 #define Int_47 32|15 @@ -27,3 +27,16 @@ #define Int_n9223372036854775808 9223372036854775808 #define Fn_Int_9(_3) _3*3 +#define Fn_Int_unused_arg_3(x_unused_0) 3 + +#define Cast_unsigned_short_Int_6 (unsigned short) 6 +#define Cast_const_unsigned_short_Int_6 (const unsigned short) 6 + +#define Cast_unsigned_int_Int_6 (unsigned int) 6 +#define Cast_const_unsigned_int_Int_6 (const unsigned int) 6 + +#define Cast_unsigned_long_Int_6 (unsigned long) 6 +#define Cast_const_unsigned_long_Int_6 (const unsigned long) 6 + +#define Cast_unsigned_long_long_Int_6 (unsigned long long) 6 +#define Cast_const_unsigned_long_long_Int_6 (const unsigned long long) 6