Skip to content

Commit 26c536f

Browse files
authored
Add basic support for gensub function (#89)
* Add basic support for gensub function * Handle errors without panics (just print warnings) * Use strtoi to parse integer in gen_subst_dynamic so that we wouldn't have to convert to &str * Fix after review
1 parent 3a25726 commit 26c536f

11 files changed

Lines changed: 285 additions & 3 deletions

File tree

src/builtins.rs

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -35,6 +35,7 @@ pub enum Function {
3535
SubstrIndex,
3636
Sub,
3737
GSub,
38+
GenSub,
3839
EscapeCSV,
3940
EscapeTSV,
4041
JoinCols,
@@ -203,6 +204,7 @@ static_map!(
203204
["match", Function::Match],
204205
["sub", Function::Sub],
205206
["gsub", Function::GSub],
207+
["gensub", Function::GenSub],
206208
["substr", Function::Substr],
207209
["int", Function::ToInt],
208210
["hex", Function::HexToInt],
@@ -298,6 +300,7 @@ impl Function {
298300
ctx.nw.add_dep(v, arr, Constraint::ValIn(()));
299301
ctx.nw.add_dep(arr, v, Constraint::Val(()));
300302
}
303+
// TODO: GenSub?
301304
Function::Sub | Function::GSub => {
302305
let out_str = args[2];
303306
let str_const = ctx.constant(Scalar(BaseTy::Str).abs());
@@ -425,6 +428,7 @@ impl Function {
425428
Length => (smallvec![incoming[0]], Int),
426429
Close => (smallvec![Str], Str),
427430
Sub | GSub => (smallvec![Str, Str, Str], Int),
431+
GenSub => (smallvec![Str, Str, Str, Str], Str),
428432
ToUpper | ToLower | EscapeCSV | EscapeTSV => (smallvec![Str], Str),
429433
Substr => (smallvec![Str, Int, Int], Str),
430434
Match => (smallvec![Str, Str], Int),
@@ -456,6 +460,7 @@ impl Function {
456460
SetFI | SubstrIndex | Match | Setcol | Binop(_) => 2,
457461
JoinCSV | JoinTSV | Delete | Contains => 2,
458462
IncMap | JoinCols | Substr | Sub | GSub | Split => 3,
463+
GenSub => 4,
459464
})
460465
}
461466

@@ -493,7 +498,7 @@ impl Function {
493498
| ReadErrCmd | ReadErrStdin | Contains | Delete | Match | Sub | GSub | ToInt
494499
| System | HexToInt => Ok(Scalar(BaseTy::Int).abs()),
495500
ToUpper | ToLower | JoinCSV | JoinTSV | JoinCols | EscapeCSV | EscapeTSV | Substr
496-
| Unop(Column) | Binop(Concat) | Nextline | NextlineCmd | NextlineStdin => {
501+
| Unop(Column) | Binop(Concat) | Nextline | NextlineCmd | NextlineStdin | GenSub => {
497502
Ok(Scalar(BaseTy::Str).abs())
498503
}
499504
IncMap => Ok(step_arith(&types::val_of(&args[0])?, &args[2])),

src/bytecode.rs

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -133,6 +133,13 @@ pub(crate) enum Instr<'a> {
133133
/*for*/ Reg<Str<'a>>,
134134
/*in*/ Reg<Str<'a>>,
135135
),
136+
GenSubDynamic(
137+
Reg<Str<'a>>,
138+
/*pat*/ Reg<Str<'a>>,
139+
/*for*/ Reg<Str<'a>>,
140+
/*how*/ Reg<Str<'a>>,
141+
/*in*/ Reg<Str<'a>>,
142+
),
136143
EscapeCSV(Reg<Str<'a>>, Reg<Str<'a>>),
137144
EscapeTSV(Reg<Str<'a>>, Reg<Str<'a>>),
138145
Substr(Reg<Str<'a>>, Reg<Str<'a>>, Reg<Int>, Reg<Int>),
@@ -558,6 +565,13 @@ impl<'a> Instr<'a> {
558565
s.accum(&mut f);
559566
in_s.accum(&mut f);
560567
}
568+
GenSubDynamic(res, pat, s, how, in_s) => {
569+
res.accum(&mut f);
570+
pat.accum(&mut f);
571+
s.accum(&mut f);
572+
how.accum(&mut f);
573+
in_s.accum(&mut f);
574+
}
561575
EscapeCSV(res, s) | EscapeTSV(res, s) => {
562576
res.accum(&mut f);
563577
s.accum(&mut f);

src/cfg.rs

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1737,6 +1737,14 @@ where
17371737
}?;
17381738
return Ok((next, PrimExpr::Val(PrimVal::Var(res))));
17391739
}
1740+
1741+
if builtins::Function::GenSub == bi && args.len() == 3 {
1742+
// If a fourth argument isn't provided, we assume you mean $0.
1743+
let e = &Expr::Unop(ast::Unop::Column, &Expr::ILit(0));
1744+
let (next, v) = self.convert_val(e, open)?;
1745+
open = next;
1746+
prim_args.push(v);
1747+
}
17401748
return Ok((open, PrimExpr::CallBuiltin(bi, prim_args)));
17411749
}
17421750
}

src/codegen/intrinsics.rs

Lines changed: 23 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -112,6 +112,7 @@ pub(crate) fn register_all(cg: &mut impl Backend) -> Result<()> {
112112
[ReadOnly] substr_index(str_ref_ty, str_ref_ty) -> int_ty;
113113
subst_first(rt_ty, str_ref_ty, str_ref_ty, str_ref_ty) -> int_ty;
114114
subst_all(rt_ty, str_ref_ty, str_ref_ty, str_ref_ty) -> int_ty;
115+
gen_subst(rt_ty, str_ref_ty, str_ref_ty, str_ref_ty, str_ref_ty) -> str_ty;
115116
escape_csv(str_ref_ty) -> str_ty;
116117
escape_tsv(str_ref_ty) -> str_ty;
117118
substr(str_ref_ty, int_ty, int_ty) -> str_ty;
@@ -833,6 +834,28 @@ pub(crate) unsafe extern "C" fn subst_all(
833834
nsubs
834835
}
835836

837+
pub(crate) unsafe extern "C" fn gen_subst(
838+
runtime: *mut c_void,
839+
pat: *mut U128,
840+
s: *mut U128,
841+
how: *mut U128,
842+
in_s: *mut U128,
843+
) -> U128 {
844+
let runtime = &mut *(runtime as *mut Runtime);
845+
let s = &mut *(s as *mut Str);
846+
let pat = &*(pat as *mut Str);
847+
let how = &*(how as *mut Str);
848+
let in_s = &mut *(in_s as *mut Str);
849+
let subbed = try_abort!(
850+
runtime,
851+
runtime
852+
.core
853+
.regexes
854+
.with_regex(pat, |re| in_s.gen_subst_dynamic(re, s, how))
855+
);
856+
mem::transmute::<Str, U128>(subbed)
857+
}
858+
836859
pub(crate) unsafe extern "C" fn escape_csv(s: *mut U128) -> U128 {
837860
mem::transmute::<Str, U128>(runtime::escape_csv(&*(s as *mut Str)))
838861
}

src/codegen/mod.rs

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -729,6 +729,16 @@ pub(crate) trait CodeGenerator: Backend {
729729
self.call_intrinsic(intrinsic!(subst_all), &mut [rt, patv, sv, in_sv])?;
730730
self.bind_val(res.reflect(), resv)
731731
}
732+
GenSubDynamic(res, pat, s, how, in_s) => {
733+
let rt = self.runtime_val();
734+
let patv = self.get_val(pat.reflect())?;
735+
let sv = self.get_val(s.reflect())?;
736+
let howv = self.get_val(how.reflect())?;
737+
let in_sv = self.get_val(in_s.reflect())?;
738+
let resv =
739+
self.call_intrinsic(intrinsic!(gen_subst), &mut [rt, patv, sv, howv, in_sv])?;
740+
self.bind_val(res.reflect(), resv)
741+
}
732742
EscapeCSV(dst, s) => self.unop(intrinsic!(escape_csv), dst, s),
733743
EscapeTSV(dst, s) => self.unop(intrinsic!(escape_tsv), dst, s),
734744
Substr(res, base, l, r) => {

src/compile.rs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1505,6 +1505,18 @@ impl<'a, 'b> View<'a, 'b> {
15051505
conv_regs[2].into(),
15061506
))
15071507
}
1508+
GenSub => {
1509+
if res_reg != UNUSED {
1510+
// TODO: emit specialized versions of GenSub (how to inspect constants?)
1511+
self.pushl(LL::GenSubDynamic(
1512+
res_reg.into(),
1513+
conv_regs[0].into(),
1514+
conv_regs[1].into(),
1515+
conv_regs[2].into(),
1516+
conv_regs[3].into(),
1517+
));
1518+
}
1519+
}
15081520
EscapeCSV => {
15091521
if res_reg != UNUSED {
15101522
self.pushl(LL::EscapeCSV(res_reg.into(), conv_regs[0].into()))

src/dataflow.rs

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -254,6 +254,12 @@ pub(crate) mod boilerplate {
254254
f(dstin.into(), Some(x.into()));
255255
f(dstin.into(), Some(y.into()));
256256
}
257+
GenSubDynamic(dst, pat, s, how, in_s) => {
258+
f(dst.into(), Some(pat.into()));
259+
f(dst.into(), Some(s.into()));
260+
f(dst.into(), Some(how.into()));
261+
f(dst.into(), Some(in_s.into()));
262+
}
257263
EscapeTSV(dst, src) | EscapeCSV(dst, src) => f(dst.into(), Some(src.into())),
258264
Substr(dst, x, y, z) => {
259265
f(dst.into(), Some(x.into()));

src/display.rs

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -171,6 +171,7 @@ impl Display for Function {
171171
SubstrIndex => write!(f, "index"),
172172
Sub => write!(f, "sub"),
173173
GSub => write!(f, "gsub"),
174+
GenSub => write!(f, "gensub"),
174175
EscapeCSV => write!(f, "escape_csv"),
175176
EscapeTSV => write!(f, "escape_tsv"),
176177
JoinCSV => write!(f, "join_csv"),

src/harness.rs

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1399,6 +1399,38 @@ this as well"#
13991399
@input "6.18163e-27\n1.80782e-40\n2.38296e-05\n1.92843e-09\n7.37465e-39\n"
14001400
);
14011401

1402+
test_program!(
1403+
gensub_basic,
1404+
r#"{$0 = gensub("(Hello), ([a-zA-Z]+)", "\\2", "g", $0)}; {print}"#,
1405+
"Joe\nNick\nBye, Joe\nRick\nBye, Rich\n",
1406+
@input "Hello, Joe\nHello, Nick\nBye, Joe\nHello, Rick\nBye, Rich\n"
1407+
);
1408+
1409+
test_program!(
1410+
gensub_first,
1411+
r#"BEGIN { v = "1234"; v = gensub("([0-9])([0-9])", "\\2", "1", v); print v}"#,
1412+
"234\n"
1413+
);
1414+
1415+
test_program!(
1416+
gensub_second,
1417+
r#"BEGIN { v = "1234"; v = gensub("([0-9])([0-9])", "\\2", "2", v); print v}"#,
1418+
"124\n"
1419+
);
1420+
1421+
test_program!(
1422+
gensub_third, // this matches nothing
1423+
r#"BEGIN { v = "1234"; v = gensub("([0-9])([0-9])", "\\2", "3", v); print v}"#,
1424+
"1234\n"
1425+
);
1426+
1427+
test_program!(
1428+
gensub_on_input, // this matches nothing
1429+
r#"{print gensub("a", "b", "g")}"#,
1430+
"bbobb\n",
1431+
@input "aboba\n"
1432+
);
1433+
14021434
// TODO test more operators, consider more edge cases around functions
14031435
}
14041436

src/interp.rs

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -883,6 +883,18 @@ impl<'a, LR: LineReader> Interp<'a, LR> {
883883
*index_mut(&mut self.strs, in_s) = subbed;
884884
*index_mut(&mut self.ints, res) = subs_made;
885885
}
886+
GenSubDynamic(res, pat, s, how, in_s) => {
887+
let subbed = {
888+
let pat = index(&self.strs, pat);
889+
let s = index(&self.strs, s);
890+
let how = index(&self.strs, how);
891+
let in_s = index(&self.strs, in_s);
892+
self.core
893+
.regexes
894+
.with_regex(pat, |re| in_s.gen_subst_dynamic(re, s, how))?
895+
};
896+
*index_mut(&mut self.strs, res) = subbed;
897+
}
886898
EscapeCSV(res, s) => {
887899
*index_mut(&mut self.strs, res) = {
888900
let s = index(&self.strs, s);

0 commit comments

Comments
 (0)